Load Necessary Libraries

# Function to check and install packages if not already installed
install_if_missing <- function(packages) {
  for (pkg in packages) {
    if (!requireNamespace(pkg, quietly = TRUE)) {
      install.packages(pkg, dependencies = TRUE)
    }
    library(pkg, character.only = TRUE)
  }
}

# List of necessary packages
packages <- c("RSQLite", "ggplot2", "tidyr", "dplyr", "plotly", "sf", "viridis", "rmapshaper", "rlang", "scales", "gridExtra")

# Install and load the necessary packages
install_if_missing(packages)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
## Loading required package: viridisLite
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

Initial Data Preparation

# Only run the next codes if being run for the first time
# Adjusted path for the database
# con <- dbConnect(RSQLite::SQLite(), "/Users/mellogwayo/Desktop/Household Financial Health Analytics Platform/database/variables for analysis.db")
# 'data' is the table name within the SQLite database
# df <- dbReadTable(con, "data")
# Close the connection
# dbDisconnect(con)
# Original variables CSV file path updated
# write.csv(df, "/Users/mellogwayo/Desktop/Household Financial Health Analytics Platform/data/processed/variables.csv", row.names = FALSE)

Load Data

# Loading the data from the CSV file into a DataFrame
data <- read.csv("/Users/mellogwayo/Desktop/Household Financial Health Analytics Platform/data/processed/transformed_variables.csv")

# View the first few rows of the data to confirm it's loaded correctly
head(data)
##       pidp jkl_hidp j_hidp k_hidp   l_hidp jkl_pno jkl_mnpno jkl_fnpno
## 1 68008847 68013622    -14    -14 68013622       1 not in hh not in hh
## 2 68010887 68027222    -14    -14 68027222       1 not in hh not in hh
## 3 68068082 68027222    -14    -14 68027222       2 not in hh not in hh
## 4 68031967 68081622    -14    -14 68081622       1 not in hh not in hh
## 5 68036727 68095222    -14    -14 68095222       1 not in hh not in hh
## 6 68037407 68102022    -14    -14 68102022       1 not in hh not in hh
##      jkl_mnpid    jkl_fnpid jkl_mnspno jkl_fnspno   jkl_mnspid   jkl_fnspid
## 1 inapplicable inapplicable  not in hh  not in hh inapplicable inapplicable
## 2 inapplicable inapplicable  not in hh  not in hh inapplicable inapplicable
## 3 inapplicable inapplicable  not in hh  not in hh inapplicable inapplicable
## 4 inapplicable inapplicable  not in hh  not in hh inapplicable inapplicable
## 5 inapplicable inapplicable  not in hh  not in hh inapplicable inapplicable
## 6 inapplicable inapplicable  not in hh  not in hh inapplicable inapplicable
##   jkl_grmpno jkl_grfpno jkl_childpno jkl_country    jkl_gor_dv jkl_urban_dv
## 1  not in hh  not in hh inapplicable     England    North East   urban area
## 2  not in hh  not in hh inapplicable     England    North East   urban area
## 3  not in hh  not in hh inapplicable     England    North East   urban area
## 4  not in hh  not in hh inapplicable     England West Midlands   urban area
## 5  not in hh  not in hh inapplicable     England    North East   urban area
## 6  not in hh  not in hh inapplicable     England    North East   urban area
##   jkl_sex_dv                                 jkl_mastat_dv
## 1     Female                                       Widowed
## 2     Female                                       Married
## 3       Male                                       Married
## 4     Female                                       Widowed
## 5       Male                                       Widowed
## 6     Female Single and never married/in civil partnership
##               jkl_jbstat                                   jkl_ethn_dv
## 1 Paid employment(ft/pt) british/english/scottish/welsh/northern irish
## 2 Paid employment(ft/pt) british/english/scottish/welsh/northern irish
## 3          Self employed british/english/scottish/welsh/northern irish
## 4                Retired british/english/scottish/welsh/northern irish
## 5                Retired british/english/scottish/welsh/northern irish
## 6 Paid employment(ft/pt) british/english/scottish/welsh/northern irish
##    bornuk_dv                           jkl_jbsoc00_cc
## 1 born in uk      Administrative occupations: general
## 2 born in uk Healthcare and related personal services
## 3 born in uk                      Construction trades
## 4 born in uk                             inapplicable
## 5 born in uk                             inapplicable
## 6 born in uk        Assemblers and routine operatives
##                 jkl_jbnssec8_dv               jkl_jbnssec5_dv jkl_jbnssec3_dv
## 1                  Intermediate                  Intermediate    Intermediate
## 2                  Semi-routine        Semi-routine & routine         Routine
## 3 Small employers & own account Small employers & own account    Intermediate
## 4                  inapplicable                  inapplicable    inapplicable
## 5                  inapplicable                  inapplicable    inapplicable
## 6                       Routine        Semi-routine & routine         Routine
##   jkl_fimnnet_dv jkl_health jkl_nkids_dv
## 1        2288.00        Yes         none
## 2        1200.00        Yes         none
## 3        3275.91         No         none
## 4        1282.17        Yes         none
## 5        1456.17        Yes         none
## 6        1186.31         No         none
##                                                     jkl_hhtype_dv
## 1                                  1 female, age 60+, no children
## 2                  Couple both under pensionable age, no children
## 3                  Couple both under pensionable age, no children
## 4                                  1 female, age 60+, no children
## 5                                   1 male, aged 65+, no children
## 6 2 adults, not a couple, both under pensionable age, no children
##                jkl_tenure_dv jkl_fihhmnnet1_dv
## 1        Owned with mortgage           2288.00
## 2        Owned with mortgage           4475.91
## 3        Owned with mortgage           4475.91
## 4       Local authority rent           1282.17
## 5                    missing           1456.17
## 6 Rented private unfurnished           2446.63

Visualizations

Distribution of Sample Members by Country

ggplot(data, aes(x = jkl_country)) + 
  geom_bar() + 
  labs(title = "Distribution of Sample Members by Country", x = "Country", y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Most of the respondents in this study are from England.

Distribution of Members by Region

ggplot(data, aes(x = jkl_gor_dv)) +
  geom_bar() +
  labs(
    title = "Distribution of Members by Region",
    x = "Region",
    y = "Count"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

The South East, London, North west and Scotland were the regions that recorded the most respondents.

The North East had a significantly lower number of respondents.

Employment Status Distribution

# Calculate percentages for the employment status
employment_status <- data %>%
  count(jkl_jbstat) %>%
  mutate(percentage = n / sum(n) * 100)

employment_status
##                                 jkl_jbstat    n  percentage
## 1                     Doing something else  101  0.53046218
## 2                      Family care or home  399  2.09558824
## 3                        Full-time student  117  0.61449580
## 4                     Govt training scheme    2  0.01050420
## 5                      LT sick or disabled  716  3.76050420
## 6                        On apprenticeship    6  0.03151261
## 7                              On furlough  146  0.76680672
## 8                       On maternity leave   15  0.07878151
## 9                   Paid employment(ft/pt) 7527 39.53256303
## 10                                 Retired 8040 42.22689076
## 11                           Self employed 1377  7.23214286
## 12 Temporarily laid off/short term working   19  0.09978992
## 13                              Unemployed  536  2.81512605
## 14                 Unpaid, family business   13  0.06827731
## 15                              don't know    8  0.04201681
## 16                                 refusal   18  0.09453782
ggplot(data, aes(x = "", fill = jkl_jbstat)) + 
  geom_bar(width = 1) + 
  coord_polar(theta = "y") + 
  labs(title = "Employment Status Distribution", x = NULL, y = NULL, fill = "Employment Status") +
  theme_void() +
  theme(legend.title = element_text(size = 12), legend.text = element_text(size = 10))

42% of the respondents are retired whilst 39% of them are in paid employment. Other categories like long-term sickness or disability, family care, and being a student represent smaller but notable percentages of the whole data set.

ggplot(data, aes(x = jkl_tenure_dv)) + 
  geom_bar(fill = "steelblue") + 
  labs(title = "Housing Tenure of Respondents", x = "Housing Tenure", y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

A significant proportion of the population in this data set own their homes.

Data Exploration and Feature Engineering

Numerical Variables: Income Distributions

numeric_variables <- c("jkl_fimnnet_dv", "jkl_fihhmnnet1_dv")
summary_statistics_list <- list()

for (var in numeric_variables) {
  summary_statistics <- data %>%
    summarise(
      Mean = mean(.data[[var]], na.rm = TRUE),
      Median = median(.data[[var]], na.rm = TRUE),
      StdDev = sd(.data[[var]], na.rm = TRUE),
      Min = min(.data[[var]], na.rm = TRUE),
      Max = max(.data[[var]], na.rm = TRUE)
    )
  
  print(paste("Summary statistics for", var))
  print(summary_statistics)
  
  summary_statistics_list[[var]] <- summary_statistics
  
  hist_plot <- ggplot(data, aes(x = !!sym(var))) +
    geom_histogram(binwidth = 100, fill = 'blue') +
    labs(title = paste('Histogram of', var), x = var, y = 'Count') +
    scale_x_continuous(limits = c(0, 30000))  # Adjust the limits as needed
  
  print(hist_plot)
}
## [1] "Summary statistics for jkl_fimnnet_dv"
##      Mean  Median   StdDev      Min      Max
## 1 1727.58 1484.97 1596.673 -5589.49 102890.9
## Warning: Removed 9 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

## [1] "Summary statistics for jkl_fihhmnnet1_dv"
##       Mean  Median   StdDev      Min      Max
## 1 3369.379 2830.57 2633.771 -3306.16 104318.9
## Warning: Removed 9 rows containing non-finite outside the scale range (`stat_bin()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

The mean total net personal income (jkl_fimnnet_dv) and the mean household income (jkl_fihhmnnet1_dv) are higher than their medians. This indicates that the distribution might be right-skewed. Incomes on the higher end are pulling the mean upwards.

The large standard deviation (1596.673) relative to the mean suggests there is significant variation in the total net personal income and household income levels.

The income range is very wide, from -5589.49 to 102890.9 for the total net personal income and from -3306.16 to 104318.9 for the household income. This wide range indicates significant disparities in the dataset, with some individuals having very high incomes and others experiencing losses or debts. It also shows that the dataset has extreme values or outliers. There is a diverse set of incomes, including some cases of significant financial difficulties and some cases of substantial wealth.

Calculating Correlation

correlation_matrix <- data %>%
  select(all_of(numeric_variables)) %>% 
  cor(use = "complete.obs")

print(correlation_matrix)
##                   jkl_fimnnet_dv jkl_fihhmnnet1_dv
## jkl_fimnnet_dv         1.0000000         0.6162484
## jkl_fihhmnnet1_dv      0.6162484         1.0000000

There is a moderate to strong positive relationship between personal incomes and household incomes. The relationship is strong enough to suggest that higher personal incomes are associated with higher household incomes.

Categorical Variables

categorical_variables <- c("jkl_country", "jkl_gor_dv", "jkl_urban_dv", "jkl_sex_dv",
                           "jkl_mastat_dv", "jkl_jbstat", "jkl_ethn_dv", "bornuk_dv",
                           "jkl_jbsoc00_cc", "jkl_jbnssec8_dv", "jkl_jbnssec5_dv",
                           "jkl_jbnssec3_dv", "jkl_health", "jkl_nkids_dv", "jkl_hhtype_dv",
                           "jkl_tenure_dv")

data_factors <- data %>%
  mutate(across(all_of(categorical_variables), factor))

for (var in categorical_variables) {
  cat("\nFrequency table for", var, ":\n")
  frequency_counts <- table(data_factors[[var]])
  print(frequency_counts)
  
  proportions <- prop.table(frequency_counts)
  cat("\nProportions for", var, ":\n")
  print(proportions)
  
  bar_plot <- ggplot(data_factors, aes(x = !!sym(var))) +
    geom_bar() +
    labs(title = paste("Distribution of", var), x = var, y = "Count") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  print(bar_plot)
}
## 
## Frequency table for jkl_country :
## 
##          England          missing Northern Ireland         Scotland 
##            14836                5             1146             1770 
##            Wales 
##             1283 
## 
## Proportions for jkl_country :
## 
##          England          missing Northern Ireland         Scotland 
##      0.779201681      0.000262605      0.060189076      0.092962185 
##            Wales 
##      0.067384454

## 
## Frequency table for jkl_gor_dv :
## 
##            East Midlands          East of England                   London 
##                     1453                     1642                     1954 
##                  missing               North East               North West 
##                        5                      728                     1813 
##         Northern Ireland                 Scotland               South East 
##                     1146                     1770                     2425 
##               South West                    Wales            West Midlands 
##                     1702                     1283                     1524 
## Yorkshire and the Humber 
##                     1595 
## 
## Proportions for jkl_gor_dv :
## 
##            East Midlands          East of England                   London 
##              0.076313025              0.086239496              0.102626050 
##                  missing               North East               North West 
##              0.000262605              0.038235294              0.095220588 
##         Northern Ireland                 Scotland               South East 
##              0.060189076              0.092962185              0.127363445 
##               South West                    Wales            West Midlands 
##              0.089390756              0.067384454              0.080042017 
## Yorkshire and the Humber 
##              0.083771008

## 
## Frequency table for jkl_urban_dv :
## 
##    missing rural area urban area 
##         56       5198      13786 
## 
## Proportions for jkl_urban_dv :
## 
##     missing  rural area  urban area 
## 0.002941176 0.273004202 0.724054622

## 
## Frequency table for jkl_sex_dv :
## 
##       Female inconsistent         Male      missing 
##        10404            1         8304          331 
## 
## Proportions for jkl_sex_dv :
## 
##       Female inconsistent         Male      missing 
## 5.464286e-01 5.252101e-05 4.361345e-01 1.738445e-02

## 
## Frequency table for jkl_mastat_dv :
## 
##                        A former civil partner 
##                                            10 
##                     A surviving civil partner 
##                                             9 
##                                      Divorced 
##                                          1713 
##                                    don't know 
##                                            11 
##    In a registered same-sex civil partnership 
##                                           151 
##                              Living as couple 
##                                          1949 
##                                       Married 
##                                         10788 
##                                       missing 
##                                             1 
##                                       refusal 
##                                            34 
##                 Separated but legally married 
##                                           269 
##                  Separated from civil partner 
##                                            22 
## Single and never married/in civil partnership 
##                                          2413 
##                                       Widowed 
##                                          1670 
## 
## Proportions for jkl_mastat_dv :
## 
##                        A former civil partner 
##                                  5.252101e-04 
##                     A surviving civil partner 
##                                  4.726891e-04 
##                                      Divorced 
##                                  8.996849e-02 
##                                    don't know 
##                                  5.777311e-04 
##    In a registered same-sex civil partnership 
##                                  7.930672e-03 
##                              Living as couple 
##                                  1.023634e-01 
##                                       Married 
##                                  5.665966e-01 
##                                       missing 
##                                  5.252101e-05 
##                                       refusal 
##                                  1.785714e-03 
##                 Separated but legally married 
##                                  1.412815e-02 
##                  Separated from civil partner 
##                                  1.155462e-03 
## Single and never married/in civil partnership 
##                                  1.267332e-01 
##                                       Widowed 
##                                  8.771008e-02

## 
## Frequency table for jkl_jbstat :
## 
##                    Doing something else                              don't know 
##                                     101                                       8 
##                     Family care or home                       Full-time student 
##                                     399                                     117 
##                    Govt training scheme                     LT sick or disabled 
##                                       2                                     716 
##                       On apprenticeship                             On furlough 
##                                       6                                     146 
##                      On maternity leave                  Paid employment(ft/pt) 
##                                      15                                    7527 
##                                 refusal                                 Retired 
##                                      18                                    8040 
##                           Self employed Temporarily laid off/short term working 
##                                    1377                                      19 
##                              Unemployed                 Unpaid, family business 
##                                     536                                      13 
## 
## Proportions for jkl_jbstat :
## 
##                    Doing something else                              don't know 
##                            0.0053046218                            0.0004201681 
##                     Family care or home                       Full-time student 
##                            0.0209558824                            0.0061449580 
##                    Govt training scheme                     LT sick or disabled 
##                            0.0001050420                            0.0376050420 
##                       On apprenticeship                             On furlough 
##                            0.0003151261                            0.0076680672 
##                      On maternity leave                  Paid employment(ft/pt) 
##                            0.0007878151                            0.3953256303 
##                                 refusal                                 Retired 
##                            0.0009453782                            0.4222689076 
##                           Self employed Temporarily laid off/short term working 
##                            0.0723214286                            0.0009978992 
##                              Unemployed                 Unpaid, family business 
##                            0.0281512605                            0.0006827731

## 
## Frequency table for jkl_ethn_dv :
## 
##                                       african 
##                                           220 
##                    any other asian background 
##                                           154 
##                    any other black background 
##                                            29 
##                        any other ethnic group 
##                                            67 
##                    any other mixed background 
##                                            63 
##                    any other white background 
##                                           527 
##                                          arab 
##                                            29 
##                                   bangladeshi 
##                                           114 
## british/english/scottish/welsh/northern irish 
##                                         16103 
##                                     caribbean 
##                                           313 
##                                       chinese 
##                                            73 
##                      gypsy or irish traveller 
##                                             4 
##                                        indian 
##                                           530 
##                                         irish 
##                                           212 
##                                       missing 
##                                           154 
##                                     pakistani 
##                                           253 
##                               white and asian 
##                                            65 
##                       white and black african 
##                                            31 
##                     white and black caribbean 
##                                            99 
## 
## Proportions for jkl_ethn_dv :
## 
##                                       african 
##                                   0.011554622 
##                    any other asian background 
##                                   0.008088235 
##                    any other black background 
##                                   0.001523109 
##                        any other ethnic group 
##                                   0.003518908 
##                    any other mixed background 
##                                   0.003308824 
##                    any other white background 
##                                   0.027678571 
##                                          arab 
##                                   0.001523109 
##                                   bangladeshi 
##                                   0.005987395 
## british/english/scottish/welsh/northern irish 
##                                   0.845745798 
##                                     caribbean 
##                                   0.016439076 
##                                       chinese 
##                                   0.003834034 
##                      gypsy or irish traveller 
##                                   0.000210084 
##                                        indian 
##                                   0.027836134 
##                                         irish 
##                                   0.011134454 
##                                       missing 
##                                   0.008088235 
##                                     pakistani 
##                                   0.013287815 
##                               white and asian 
##                                   0.003413866 
##                       white and black african 
##                                   0.001628151 
##                     white and black caribbean 
##                                   0.005199580

## 
## Frequency table for bornuk_dv :
## 
##     born in uk        missing not born in uk 
##          16565            236           2239 
## 
## Proportions for bornuk_dv :
## 
##     born in uk        missing not born in uk 
##     0.87001050     0.01239496     0.11759454

## 
## Frequency table for jkl_jbsoc00_cc :
## 
##                       Administrative occupations: communications 
##                                                               10 
##                              Administrative occupations: finance 
##                                                              239 
##                              Administrative occupations: general 
##                                                              246 
## Administrative occupations: government and related organisations 
##                                                              104 
##                              Administrative occupations: records 
##                                                              161 
##                                              Agricultural trades 
##                                                              102 
##                                             Animal care services 
##                                                               25 
##                             Architects, town planners, surveyors 
##                                                               44 
##                                Artistic and literary occupations 
##                                                               65 
##                                Assemblers and routine operatives 
##                                                               80 
##                                                  Building trades 
##                                                               34 
##                     Business and finance associate professionals 
##                                                              261 
##                           Business and statistical professionals 
##                                                              110 
##                          Childcare and related personal services 
##                                                              251 
##                             Conservation associate professionals 
##                                                               12 
##                                          Construction operatives 
##                                                               30 
##                                              Construction trades 
##                                                              142 
##                          Corporate managers and senior officials 
##                                                               44 
##                                     Customer service occupations 
##                                                              111 
##                                   Design associate professionals 
##                                                               56 
##                                                       don't know 
##                                                               13 
##                          Draughtspersons and building inspectors 
##                                                               19 
##                                                Electrical trades 
##                                                               84 
##                            Elementary administration occupations 
##                                                               76 
##                              Elementary agricultural occupations 
##                                                               18 
##                                  Elementary cleaning occupations 
##                                                              216 
##                              Elementary construction occupations 
##                                                               37 
##                             Elementary goods storage occupations 
##                                                               81 
##                         Elementary personal services occupations 
##                                                              138 
##                             Elementary process plant occupations 
##                                                               49 
##                                     Elementary sales occupations 
##                                                               29 
##                                  Elementary security occupations 
##                                                               97 
##                                        Engineering professionals 
##                                                              108 
##                        Financial institution and office managers 
##                                                              152 
##                                          Food preparation trades 
##                                                               86 
##                                              Functional managers 
##                                                              386 
##                             Hairdressers and related occupations 
##                                                               36 
##                              Health and social services managers 
##                                                               70 
##                                   Health associate professionals 
##                                                              307 
##                                             Health professionals 
##                                                              141 
##                         Healthcare and related personal services 
##                                                              449 
##                                         Housekeeping occupations 
##                                                               41 
##                                                     inapplicable 
##                                                            10154 
##           Information and communication technology professionals 
##                                                              149 
##                                  IT service delivery occupations 
##                                                               81 
##                                    Legal associate professionals 
##                                                               27 
##                                              Legal professionals 
##                                                               61 
##                           Leisure and travel service occupations 
##                                                               45 
##                             Librarians and related professionals 
##                                                               16 
##     Managers and proprietors in hospitality and leisure services 
##                                                               69 
##             Managers and proprietors in other service industries 
##                                                              322 
##                  Managers in distribution, storage and retailing 
##                                                              121 
##         Managers in farming, horticulture, forestry and services 
##                                                               19 
##                                    Media associate professionals 
##                                                               57 
##                        Metal forming, welding and related trades 
##                                                               18 
##            Metal machining, fitting and instrument making trades 
##                                                               91 
##                                                          missing 
##                                                              364 
##                            Mobile machine drivers and operatives 
##                                                               28 
##                                Personal services occupations nec 
##                                                               14 
##                                     Plant and machine operatives 
##                                                               56 
##                                                  Printing trades 
##                                                               12 
##                                               Process operatives 
##                                                               46 
##                                              Production managers 
##                                                              127 
##                                   Protective service occupations 
##                                                               81 
##                                      Protective service officers 
##                                                               11 
##                 Public service and other associate professionals 
##                                                              173 
##                                     Public service professionals 
##                                                               86 
##                               Quality and customer care managers 
##                                                               53 
##                                                          refusal 
##                                                               49 
##                                           Research professionals 
##                                                               54 
##                        Sales and related associate professionals 
##                                                              136 
##                             Sales assistants and retail cashiers 
##                                                              308 
##                                        Sales related occupations 
##                                                               40 
##                              Science and engineering technicians 
##                                                               85 
##                                            Science professionals 
##                                                               33 
##                              Secretarial and related occupations 
##                                                              204 
##                                               Skilled trades nec 
##                                                               26 
##                           Social welfare associate professionals 
##                                                              123 
##                                   Sports and fitness occupations 
##                                                               31 
##                                           Teaching professionals 
##                                                              481 
##                                     Textiles and garments trades 
##                                                               17 
##                                                       Therapists 
##                                                               87 
##                                Transport associate professionals 
##                                                               25 
##                                 Transport drivers and operatives 
##                                                              264 
##                                                   Vehicle trades 
##                                                               36 
## 
## Proportions for jkl_jbsoc00_cc :
## 
##                       Administrative occupations: communications 
##                                                     0.0005252101 
##                              Administrative occupations: finance 
##                                                     0.0125525210 
##                              Administrative occupations: general 
##                                                     0.0129201681 
## Administrative occupations: government and related organisations 
##                                                     0.0054621849 
##                              Administrative occupations: records 
##                                                     0.0084558824 
##                                              Agricultural trades 
##                                                     0.0053571429 
##                                             Animal care services 
##                                                     0.0013130252 
##                             Architects, town planners, surveyors 
##                                                     0.0023109244 
##                                Artistic and literary occupations 
##                                                     0.0034138655 
##                                Assemblers and routine operatives 
##                                                     0.0042016807 
##                                                  Building trades 
##                                                     0.0017857143 
##                     Business and finance associate professionals 
##                                                     0.0137079832 
##                           Business and statistical professionals 
##                                                     0.0057773109 
##                          Childcare and related personal services 
##                                                     0.0131827731 
##                             Conservation associate professionals 
##                                                     0.0006302521 
##                                          Construction operatives 
##                                                     0.0015756303 
##                                              Construction trades 
##                                                     0.0074579832 
##                          Corporate managers and senior officials 
##                                                     0.0023109244 
##                                     Customer service occupations 
##                                                     0.0058298319 
##                                   Design associate professionals 
##                                                     0.0029411765 
##                                                       don't know 
##                                                     0.0006827731 
##                          Draughtspersons and building inspectors 
##                                                     0.0009978992 
##                                                Electrical trades 
##                                                     0.0044117647 
##                            Elementary administration occupations 
##                                                     0.0039915966 
##                              Elementary agricultural occupations 
##                                                     0.0009453782 
##                                  Elementary cleaning occupations 
##                                                     0.0113445378 
##                              Elementary construction occupations 
##                                                     0.0019432773 
##                             Elementary goods storage occupations 
##                                                     0.0042542017 
##                         Elementary personal services occupations 
##                                                     0.0072478992 
##                             Elementary process plant occupations 
##                                                     0.0025735294 
##                                     Elementary sales occupations 
##                                                     0.0015231092 
##                                  Elementary security occupations 
##                                                     0.0050945378 
##                                        Engineering professionals 
##                                                     0.0056722689 
##                        Financial institution and office managers 
##                                                     0.0079831933 
##                                          Food preparation trades 
##                                                     0.0045168067 
##                                              Functional managers 
##                                                     0.0202731092 
##                             Hairdressers and related occupations 
##                                                     0.0018907563 
##                              Health and social services managers 
##                                                     0.0036764706 
##                                   Health associate professionals 
##                                                     0.0161239496 
##                                             Health professionals 
##                                                     0.0074054622 
##                         Healthcare and related personal services 
##                                                     0.0235819328 
##                                         Housekeeping occupations 
##                                                     0.0021533613 
##                                                     inapplicable 
##                                                     0.5332983193 
##           Information and communication technology professionals 
##                                                     0.0078256303 
##                                  IT service delivery occupations 
##                                                     0.0042542017 
##                                    Legal associate professionals 
##                                                     0.0014180672 
##                                              Legal professionals 
##                                                     0.0032037815 
##                           Leisure and travel service occupations 
##                                                     0.0023634454 
##                             Librarians and related professionals 
##                                                     0.0008403361 
##     Managers and proprietors in hospitality and leisure services 
##                                                     0.0036239496 
##             Managers and proprietors in other service industries 
##                                                     0.0169117647 
##                  Managers in distribution, storage and retailing 
##                                                     0.0063550420 
##         Managers in farming, horticulture, forestry and services 
##                                                     0.0009978992 
##                                    Media associate professionals 
##                                                     0.0029936975 
##                        Metal forming, welding and related trades 
##                                                     0.0009453782 
##            Metal machining, fitting and instrument making trades 
##                                                     0.0047794118 
##                                                          missing 
##                                                     0.0191176471 
##                            Mobile machine drivers and operatives 
##                                                     0.0014705882 
##                                Personal services occupations nec 
##                                                     0.0007352941 
##                                     Plant and machine operatives 
##                                                     0.0029411765 
##                                                  Printing trades 
##                                                     0.0006302521 
##                                               Process operatives 
##                                                     0.0024159664 
##                                              Production managers 
##                                                     0.0066701681 
##                                   Protective service occupations 
##                                                     0.0042542017 
##                                      Protective service officers 
##                                                     0.0005777311 
##                 Public service and other associate professionals 
##                                                     0.0090861345 
##                                     Public service professionals 
##                                                     0.0045168067 
##                               Quality and customer care managers 
##                                                     0.0027836134 
##                                                          refusal 
##                                                     0.0025735294 
##                                           Research professionals 
##                                                     0.0028361345 
##                        Sales and related associate professionals 
##                                                     0.0071428571 
##                             Sales assistants and retail cashiers 
##                                                     0.0161764706 
##                                        Sales related occupations 
##                                                     0.0021008403 
##                              Science and engineering technicians 
##                                                     0.0044642857 
##                                            Science professionals 
##                                                     0.0017331933 
##                              Secretarial and related occupations 
##                                                     0.0107142857 
##                                               Skilled trades nec 
##                                                     0.0013655462 
##                           Social welfare associate professionals 
##                                                     0.0064600840 
##                                   Sports and fitness occupations 
##                                                     0.0016281513 
##                                           Teaching professionals 
##                                                     0.0252626050 
##                                     Textiles and garments trades 
##                                                     0.0008928571 
##                                                       Therapists 
##                                                     0.0045693277 
##                                Transport associate professionals 
##                                                     0.0013130252 
##                                 Transport drivers and operatives 
##                                                     0.0138655462 
##                                                   Vehicle trades 
##                                                     0.0018907563

## 
## Frequency table for jkl_jbnssec8_dv :
## 
##                 Higher professional                        inapplicable 
##                                 829                               10148 
##                        Intermediate Large employers & higher management 
##                                1179                                 442 
##     Lower management & professional       Lower supervisory & technical 
##                                2652                                 514 
##                             missing                             Routine 
##                                 440                                 710 
##                        Semi-routine       Small employers & own account 
##                                1291                                 835 
## 
## Proportions for jkl_jbnssec8_dv :
## 
##                 Higher professional                        inapplicable 
##                          0.04353992                          0.53298319 
##                        Intermediate Large employers & higher management 
##                          0.06192227                          0.02321429 
##     Lower management & professional       Lower supervisory & technical 
##                          0.13928571                          0.02699580 
##                             missing                             Routine 
##                          0.02310924                          0.03728992 
##                        Semi-routine       Small employers & own account 
##                          0.06780462                          0.04385504

## 
## Frequency table for jkl_jbnssec5_dv :
## 
##                  inapplicable                  Intermediate 
##                         10148                          1179 
## Lower supervisory & technical     Management & professional 
##                           514                          3923 
##                       missing        Semi-routine & routine 
##                           440                          2001 
## Small employers & own account 
##                           835 
## 
## Proportions for jkl_jbnssec5_dv :
## 
##                  inapplicable                  Intermediate 
##                    0.53298319                    0.06192227 
## Lower supervisory & technical     Management & professional 
##                    0.02699580                    0.20603992 
##                       missing        Semi-routine & routine 
##                    0.02310924                    0.10509454 
## Small employers & own account 
##                    0.04385504

## 
## Frequency table for jkl_jbnssec3_dv :
## 
##              inapplicable              Intermediate Management & professional 
##                     10148                      2014                      3923 
##                   missing                   Routine 
##                       440                      2515 
## 
## Proportions for jkl_jbnssec3_dv :
## 
##              inapplicable              Intermediate Management & professional 
##                0.53298319                0.10577731                0.20603992 
##                   missing                   Routine 
##                0.02310924                0.13209034

## 
## Frequency table for jkl_health :
## 
## don't know         No    refusal        Yes 
##         18      10899         34       8089 
## 
## Proportions for jkl_health :
## 
##   don't know           No      refusal          Yes 
## 0.0009453782 0.5724264706 0.0017857143 0.4248424370

## 
## Frequency table for jkl_nkids_dv :
## 
##  none 
## 19040 
## 
## Proportions for jkl_nkids_dv :
## 
## none 
##    1

## 
## Frequency table for jkl_hhtype_dv :
## 
##                            1 adult under pensionable age, no children 
##                                                                  2210 
##                                                      1 adult, 1 child 
##                                                                    70 
##                                           1 adult, 2 or more children 
##                                                                     9 
##                                        1 female, age 60+, no children 
##                                                                  1931 
##                                         1 male, aged 65+, no children 
##                                                                   767 
##                            2 adults, not a couple, 1 or more children 
##                                                                    55 
##       2 adults, not a couple, both under pensionable age, no children 
##                                                                   467 
## 2 adults, not a couple, one or more over pensionable age, no children 
##                                                                   417 
##               3 or more adults, 1 or more children, excl. any couples 
##                                                                    24 
##             3 or more adults, 1-2 children, incl. at least one couple 
##                                                                   511 
##                      3 or more adults, no children, excl. any couples 
##                                                                   283 
##              3 or more adults, no children, incl. at least one couple 
##                                                                  2979 
##                     Couple 1 or more over pensionable age,no children 
##                                                                  5707 
##                        Couple both under pensionable age, no children 
##                                                                  3293 
##                                                   Couple with 1 child 
##                                                                   260 
##                                                Couple with 2 children 
##                                                                    57 
## 
## Proportions for jkl_hhtype_dv :
## 
##                            1 adult under pensionable age, no children 
##                                                          0.1160714286 
##                                                      1 adult, 1 child 
##                                                          0.0036764706 
##                                           1 adult, 2 or more children 
##                                                          0.0004726891 
##                                        1 female, age 60+, no children 
##                                                          0.1014180672 
##                                         1 male, aged 65+, no children 
##                                                          0.0402836134 
##                            2 adults, not a couple, 1 or more children 
##                                                          0.0028886555 
##       2 adults, not a couple, both under pensionable age, no children 
##                                                          0.0245273109 
## 2 adults, not a couple, one or more over pensionable age, no children 
##                                                          0.0219012605 
##               3 or more adults, 1 or more children, excl. any couples 
##                                                          0.0012605042 
##             3 or more adults, 1-2 children, incl. at least one couple 
##                                                          0.0268382353 
##                      3 or more adults, no children, excl. any couples 
##                                                          0.0148634454 
##              3 or more adults, no children, incl. at least one couple 
##                                                          0.1564600840 
##                     Couple 1 or more over pensionable age,no children 
##                                                          0.2997373950 
##                        Couple both under pensionable age, no children 
##                                                          0.1729516807 
##                                                   Couple with 1 child 
##                                                          0.0136554622 
##                                                Couple with 2 children 
##                                                          0.0029936975

## 
## Frequency table for jkl_tenure_dv :
## 
##       Housing assoc rented       Local authority rent 
##                       1007                       1265 
##                    missing                      Other 
##                        123                         95 
##             Owned outright        Owned with mortgage 
##                       9777                       4918 
##       Rented from employer   Rented private furnished 
##                        150                        299 
## Rented private unfurnished 
##                       1406 
## 
## Proportions for jkl_tenure_dv :
## 
##       Housing assoc rented       Local authority rent 
##                0.052888655                0.066439076 
##                    missing                      Other 
##                0.006460084                0.004989496 
##             Owned outright        Owned with mortgage 
##                0.513497899                0.258298319 
##       Rented from employer   Rented private furnished 
##                0.007878151                0.015703782 
## Rented private unfurnished 
##                0.073844538

cat_vars_summary <- summary(data_factors)
print(cat_vars_summary)
##       pidp              jkl_hidp             j_hidp          
##  Min.   :5.998e+05   Min.   :6.801e+07   Min.   :       -14  
##  1st Qu.:3.675e+08   1st Qu.:3.469e+08   1st Qu.:       -14  
##  Median :7.083e+08   Median :6.873e+08   Median :       -14  
##  Mean   :7.710e+08   Mean   :7.708e+08   Mean   :  40823901  
##  3rd Qu.:1.157e+09   3rd Qu.:1.158e+09   3rd Qu.:       -14  
##  Max.   :1.653e+09   Max.   :1.638e+09   Max.   :1638126818  
##                                                              
##      k_hidp               l_hidp             jkl_pno        jkl_mnpno        
##  Min.   :       -14   Min.   :      -14   Min.   : 1.000   Length:19040      
##  1st Qu.:       -14   1st Qu.:      -14   1st Qu.: 1.000   Class :character  
##  Median :       -14   Median :139022622   Median : 1.000   Mode  :character  
##  Mean   : 485998506   Mean   :243959456   Mean   : 1.361                     
##  3rd Qu.:1090402120   3rd Qu.:478142022   3rd Qu.: 2.000                     
##  Max.   :1637766420   Max.   :823738422   Max.   :12.000                     
##                                                                              
##   jkl_fnpno          jkl_mnpid          jkl_fnpid          jkl_mnspno       
##  Length:19040       Length:19040       Length:19040       Length:19040      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   jkl_fnspno         jkl_mnspid         jkl_fnspid         jkl_grmpno       
##  Length:19040       Length:19040       Length:19040       Length:19040      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   jkl_grfpno        jkl_childpno                 jkl_country   
##  Length:19040       Length:19040       England         :14836  
##  Class :character   Class :character   missing         :    5  
##  Mode  :character   Mode  :character   Northern Ireland: 1146  
##                                        Scotland        : 1770  
##                                        Wales           : 1283  
##                                                                
##                                                                
##            jkl_gor_dv       jkl_urban_dv          jkl_sex_dv   
##  South East     :2425   missing   :   56   Female      :10404  
##  London         :1954   rural area: 5198   inconsistent:    1  
##  North West     :1813   urban area:13786   Male        : 8304  
##  Scotland       :1770                      missing     :  331  
##  South West     :1702                                          
##  East of England:1642                                          
##  (Other)        :7734                                          
##                                        jkl_mastat_dv  
##  Married                                      :10788  
##  Single and never married/in civil partnership: 2413  
##  Living as couple                             : 1949  
##  Divorced                                     : 1713  
##  Widowed                                      : 1670  
##  Separated but legally married                :  269  
##  (Other)                                      :  238  
##                   jkl_jbstat  
##  Retired               :8040  
##  Paid employment(ft/pt):7527  
##  Self employed         :1377  
##  LT sick or disabled   : 716  
##  Unemployed            : 536  
##  Family care or home   : 399  
##  (Other)               : 445  
##                                         jkl_ethn_dv             bornuk_dv    
##  british/english/scottish/welsh/northern irish:16103   born in uk    :16565  
##  indian                                       :  530   missing       :  236  
##  any other white background                   :  527   not born in uk: 2239  
##  caribbean                                    :  313                         
##  pakistani                                    :  253                         
##  african                                      :  220                         
##  (Other)                                      : 1094                         
##                                               jkl_jbsoc00_cc 
##  inapplicable                                        :10154  
##  Teaching professionals                              :  481  
##  Healthcare and related personal services            :  449  
##  Functional managers                                 :  386  
##  missing                                             :  364  
##  Managers and proprietors in other service industries:  322  
##  (Other)                                             : 6884  
##                         jkl_jbnssec8_dv                       jkl_jbnssec5_dv 
##  inapplicable                   :10148   inapplicable                 :10148  
##  Lower management & professional: 2652   Intermediate                 : 1179  
##  Semi-routine                   : 1291   Lower supervisory & technical:  514  
##  Intermediate                   : 1179   Management & professional    : 3923  
##  Small employers & own account  :  835   missing                      :  440  
##  Higher professional            :  829   Semi-routine & routine       : 2001  
##  (Other)                        : 2106   Small employers & own account:  835  
##                   jkl_jbnssec3_dv  jkl_fimnnet_dv          jkl_health   
##  inapplicable             :10148   Min.   : -5589.5   don't know:   18  
##  Intermediate             : 2014   1st Qu.:   937.7   No        :10899  
##  Management & professional: 3923   Median :  1485.0   refusal   :   34  
##  missing                  :  440   Mean   :  1727.6   Yes       : 8089  
##  Routine                  : 2515   3rd Qu.:  2166.7                     
##                                    Max.   :102890.9                     
##                                                                         
##  jkl_nkids_dv                                                  jkl_hhtype_dv 
##  none:19040   Couple 1 or more over pensionable age,no children       :5707  
##               Couple both under pensionable age, no children          :3293  
##               3 or more adults, no children, incl. at least one couple:2979  
##               1 adult under pensionable age, no children              :2210  
##               1 female, age 60+, no children                          :1931  
##               1 male, aged 65+, no children                           : 767  
##               (Other)                                                 :2153  
##                     jkl_tenure_dv  jkl_fihhmnnet1_dv
##  Owned outright            :9777   Min.   : -3306   
##  Owned with mortgage       :4918   1st Qu.:  1772   
##  Rented private unfurnished:1406   Median :  2831   
##  Local authority rent      :1265   Mean   :  3369   
##  Housing assoc rented      :1007   3rd Qu.:  4318   
##  Rented private furnished  : 299   Max.   :104319   
##  (Other)                   : 368

Participants are mainly from England but Northern Ireland, Scotland and Wales were also represented. Most participants were from urban areas. They were more than double those from rural areas. Female participants were the majority although not by a huge difference. Most participants are married. A significant number are divorced, living as a couple single and never married/in civil partnership and widowed. Majority of the participants are either retired or in paid employment. Most participants are British/ English/ Scottish/ Welsh/ Northern Irish. Other races were barely represented. Majority of the respondents were born in the UK. Most respondents had no long standing illness.

The variable jkl_jbsoc00_cc was unclear, so an alternative chart was created. The variables jkl_jbnssec8_dv, jkl_jbnssec5_dv, and jkl_jbnssec3_dv showed that most respondents marked their profession as inapplicable. Therefore, the following plots were created, excluding the inapplicable category from these variables.

# Calculate percentages for the job info
jobinfo <- data %>%
  count(jkl_jbsoc00_cc) %>%
  mutate(percentage = n / sum(n) * 100)

jobinfo
##                                                      jkl_jbsoc00_cc     n
## 1                        Administrative occupations: communications    10
## 2                               Administrative occupations: finance   239
## 3                               Administrative occupations: general   246
## 4  Administrative occupations: government and related organisations   104
## 5                               Administrative occupations: records   161
## 6                                               Agricultural trades   102
## 7                                              Animal care services    25
## 8                              Architects, town planners, surveyors    44
## 9                                 Artistic and literary occupations    65
## 10                                Assemblers and routine operatives    80
## 11                                                  Building trades    34
## 12                     Business and finance associate professionals   261
## 13                           Business and statistical professionals   110
## 14                          Childcare and related personal services   251
## 15                             Conservation associate professionals    12
## 16                                          Construction operatives    30
## 17                                              Construction trades   142
## 18                          Corporate managers and senior officials    44
## 19                                     Customer service occupations   111
## 20                                   Design associate professionals    56
## 21                          Draughtspersons and building inspectors    19
## 22                                                Electrical trades    84
## 23                            Elementary administration occupations    76
## 24                              Elementary agricultural occupations    18
## 25                                  Elementary cleaning occupations   216
## 26                              Elementary construction occupations    37
## 27                             Elementary goods storage occupations    81
## 28                         Elementary personal services occupations   138
## 29                             Elementary process plant occupations    49
## 30                                     Elementary sales occupations    29
## 31                                  Elementary security occupations    97
## 32                                        Engineering professionals   108
## 33                        Financial institution and office managers   152
## 34                                          Food preparation trades    86
## 35                                              Functional managers   386
## 36                             Hairdressers and related occupations    36
## 37                              Health and social services managers    70
## 38                                   Health associate professionals   307
## 39                                             Health professionals   141
## 40                         Healthcare and related personal services   449
## 41                                         Housekeeping occupations    41
## 42                                  IT service delivery occupations    81
## 43           Information and communication technology professionals   149
## 44                                    Legal associate professionals    27
## 45                                              Legal professionals    61
## 46                           Leisure and travel service occupations    45
## 47                             Librarians and related professionals    16
## 48     Managers and proprietors in hospitality and leisure services    69
## 49             Managers and proprietors in other service industries   322
## 50                  Managers in distribution, storage and retailing   121
## 51         Managers in farming, horticulture, forestry and services    19
## 52                                    Media associate professionals    57
## 53                        Metal forming, welding and related trades    18
## 54            Metal machining, fitting and instrument making trades    91
## 55                            Mobile machine drivers and operatives    28
## 56                                Personal services occupations nec    14
## 57                                     Plant and machine operatives    56
## 58                                                  Printing trades    12
## 59                                               Process operatives    46
## 60                                              Production managers   127
## 61                                   Protective service occupations    81
## 62                                      Protective service officers    11
## 63                 Public service and other associate professionals   173
## 64                                     Public service professionals    86
## 65                               Quality and customer care managers    53
## 66                                           Research professionals    54
## 67                        Sales and related associate professionals   136
## 68                             Sales assistants and retail cashiers   308
## 69                                        Sales related occupations    40
## 70                              Science and engineering technicians    85
## 71                                            Science professionals    33
## 72                              Secretarial and related occupations   204
## 73                                               Skilled trades nec    26
## 74                           Social welfare associate professionals   123
## 75                                   Sports and fitness occupations    31
## 76                                           Teaching professionals   481
## 77                                     Textiles and garments trades    17
## 78                                                       Therapists    87
## 79                                Transport associate professionals    25
## 80                                 Transport drivers and operatives   264
## 81                                                   Vehicle trades    36
## 82                                                       don't know    13
## 83                                                     inapplicable 10154
## 84                                                          missing   364
## 85                                                          refusal    49
##     percentage
## 1   0.05252101
## 2   1.25525210
## 3   1.29201681
## 4   0.54621849
## 5   0.84558824
## 6   0.53571429
## 7   0.13130252
## 8   0.23109244
## 9   0.34138655
## 10  0.42016807
## 11  0.17857143
## 12  1.37079832
## 13  0.57773109
## 14  1.31827731
## 15  0.06302521
## 16  0.15756303
## 17  0.74579832
## 18  0.23109244
## 19  0.58298319
## 20  0.29411765
## 21  0.09978992
## 22  0.44117647
## 23  0.39915966
## 24  0.09453782
## 25  1.13445378
## 26  0.19432773
## 27  0.42542017
## 28  0.72478992
## 29  0.25735294
## 30  0.15231092
## 31  0.50945378
## 32  0.56722689
## 33  0.79831933
## 34  0.45168067
## 35  2.02731092
## 36  0.18907563
## 37  0.36764706
## 38  1.61239496
## 39  0.74054622
## 40  2.35819328
## 41  0.21533613
## 42  0.42542017
## 43  0.78256303
## 44  0.14180672
## 45  0.32037815
## 46  0.23634454
## 47  0.08403361
## 48  0.36239496
## 49  1.69117647
## 50  0.63550420
## 51  0.09978992
## 52  0.29936975
## 53  0.09453782
## 54  0.47794118
## 55  0.14705882
## 56  0.07352941
## 57  0.29411765
## 58  0.06302521
## 59  0.24159664
## 60  0.66701681
## 61  0.42542017
## 62  0.05777311
## 63  0.90861345
## 64  0.45168067
## 65  0.27836134
## 66  0.28361345
## 67  0.71428571
## 68  1.61764706
## 69  0.21008403
## 70  0.44642857
## 71  0.17331933
## 72  1.07142857
## 73  0.13655462
## 74  0.64600840
## 75  0.16281513
## 76  2.52626050
## 77  0.08928571
## 78  0.45693277
## 79  0.13130252
## 80  1.38655462
## 81  0.18907563
## 82  0.06827731
## 83 53.32983193
## 84  1.91176471
## 85  0.25735294
# Filter out the 'inapplicable' category
filtered_data <- data %>%
  filter(jkl_jbsoc00_cc != "inapplicable")

# Calculate percentages for the employment status
job <- filtered_data %>%
  count(jkl_jbsoc00_cc) %>%
  mutate(percentage = n / sum(n) * 100)

# Filter the top 20 categories by percentage
top_20_job <- job %>%
  arrange(desc(percentage)) %>%
  slice_head(n = 20)

# Plot the top 20 categories
top_20_plot <- ggplot(top_20_job, aes(x = reorder(jkl_jbsoc00_cc, percentage), y = percentage)) +
  geom_bar(stat = "identity", fill = 'blue') +
  labs(title = 'Top 20 Employment Status Categories', x = 'Employment Status', y = 'Percentage') +
  coord_flip() +
  theme_minimal()

# Print the plot
print(top_20_plot)

The largest group of respondents falls under “Lower management & professional,” comprising just under 30%. “Semi-routine,” “Intermediate,” “Small employers & own account,” “Higher professional,” and “Routine” categories each account for a smaller but still significant proportion of the respondents.”Lower supervisory & technical,” “Large employers & higher management,” and “Missing” categories represent the smallest percentages, each below 10%. This distribution highlights the predominance of lower management and professional roles among respondents, with a diverse spread across other employment statuses.

# Filter out the 'inapplicable' category
filtered_data1 <- data %>%
  filter(jkl_jbnssec8_dv != "inapplicable")

# Calculate percentages for the employment status
sec <- filtered_data1 %>%
  count(jkl_jbnssec8_dv) %>%
  mutate(percentage = n / sum(n) * 100)

# Check the 'sec' data frame
print("SEC data frame after calculating percentages:")
## [1] "SEC data frame after calculating percentages:"
print(sec)
##                       jkl_jbnssec8_dv    n percentage
## 1                 Higher professional  829   9.322987
## 2                        Intermediate 1179  13.259109
## 3 Large employers & higher management  442   4.970760
## 4     Lower management & professional 2652  29.824561
## 5       Lower supervisory & technical  514   5.780477
## 6                             Routine  710   7.984705
## 7                        Semi-routine 1291  14.518668
## 8       Small employers & own account  835   9.390463
## 9                             missing  440   4.948268
# Plot the categories
sec_plot <- ggplot(sec, aes(x = reorder(jkl_jbnssec8_dv, percentage), y = percentage)) +
  geom_bar(stat = "identity", fill = 'blue') +
  labs(
    title = "Classification of an individual's current job based on the National Statistics Socio-economic Classification (NS-SEC) eight-class model",
    x = 'Employment Status', 
    y = 'Percentage'
  ) +
  coord_flip() +
  theme_minimal()

# Print the plot
print(sec_plot)

When looking at the “Classification of an individual’s current job based on the National Statistics Socio-economic Classification (NS-SEC) eight-class model”, the largest group of respondents have “Management & professional” as their employment status. They comprise of just under 45% of the total respondents. “Semi-routine & Routine” represents just above 20% of the population. “Intermediate” accounts for just below 15 % of the total population. “Small employers & own account” represents close to 10% of the population. Whereas “Lower supervisory & technical,” and the “Missing” categories represent the smallest percentages, each below 10%. This distribution indicates that a substantial portion of the respondents occupy lower management and professional roles, while fewer are in higher management or supervisory positions.

# Filter out the 'inapplicable' category
filtered_data2 <- data %>%
  filter(jkl_jbnssec5_dv != "inapplicable")

# Calculate percentages for the employment status
sec5 <- filtered_data2 %>%
  count(jkl_jbnssec5_dv) %>%
  mutate(percentage = n / sum(n) * 100)

# Check the 'sec' data frame
print("Simple SEC data frame after calculating percentages:")
## [1] "Simple SEC data frame after calculating percentages:"
print(sec5)
##                 jkl_jbnssec5_dv    n percentage
## 1                  Intermediate 1179  13.259109
## 2 Lower supervisory & technical  514   5.780477
## 3     Management & professional 3923  44.118309
## 4        Semi-routine & routine 2001  22.503374
## 5 Small employers & own account  835   9.390463
## 6                       missing  440   4.948268
# Plot the categories
sec_plot5 <- ggplot(sec5, aes(x = reorder(jkl_jbnssec5_dv, percentage), y = percentage)) +
  geom_bar(stat = "identity", fill = 'blue') +
  labs(
    title = "Classification of an individual's current job based on the National Statistics Socio-economic Classification (NS-SEC) five-class model",
    x = 'Employment Status', 
    y = 'Percentage'
  ) +
  coord_flip() +
  theme_minimal()

# Print the plot
print(sec_plot5)

Based on the National Statistics Socio-economic Classification (NS-SEC) five-class model, most respondents’ current jobs are categorized as “Lower management & professional,” making up nearly 30% of the total. This is followed by “Semi-routine” and “Routine” jobs, which together account for just over 20%. The “Intermediate” category includes just under 15% of the respondents. About 10% of the population falls under “Small employers & own account.” The “Lower supervisory & technical” and “Missing” categories have the smallest representation, each below 10%. This indicates a significant concentration of respondents in professional and management roles, with fewer in supervisory or technical positions.

# Filter out the 'inapplicable' category
filtered_data3 <- data %>%
  filter(jkl_jbnssec3_dv != "inapplicable")

# Calculate percentages for the employment status
sec3 <- filtered_data3 %>%
  count(jkl_jbnssec3_dv) %>%
  mutate(percentage = n / sum(n) * 100)

# Check the 'sec' data frame
print("Simple SEC data frame after calculating percentages:")
## [1] "Simple SEC data frame after calculating percentages:"
print(sec3)
##             jkl_jbnssec3_dv    n percentage
## 1              Intermediate 2014  22.649573
## 2 Management & professional 3923  44.118309
## 3                   Routine 2515  28.283851
## 4                   missing  440   4.948268
# Plot the categories
sec_plot3 <- ggplot(sec3, aes(x = reorder(jkl_jbnssec3_dv, percentage), y = percentage)) +
  geom_bar(stat = "identity", fill = 'blue') +
  labs(
    title = "Classification of an individual's current job based on the National Statistics Socio-economic Classification (NS-SEC) three-class model",
    x = 'Employment Status', 
    y = 'Percentage'
  ) +
  coord_flip() +
  theme_minimal()

# Print the plot
print(sec_plot3)

The National Statistics Socio-economic Classification (NS-SEC) three-class model reveals that nearly 45% of the population falls under the “Management & professional” category, making it the largest group. Approximately 30% of respondents are in the “Routine” category, while just over 20% are classified as “Intermediate.” The “Missing” category accounts for a small 5% of the population. This distribution highlights a significant concentration of individuals in management and professional roles, with a substantial portion also engaged in routine jobs.

Key Variable Analysis

names(data)
##  [1] "pidp"              "jkl_hidp"          "j_hidp"           
##  [4] "k_hidp"            "l_hidp"            "jkl_pno"          
##  [7] "jkl_mnpno"         "jkl_fnpno"         "jkl_mnpid"        
## [10] "jkl_fnpid"         "jkl_mnspno"        "jkl_fnspno"       
## [13] "jkl_mnspid"        "jkl_fnspid"        "jkl_grmpno"       
## [16] "jkl_grfpno"        "jkl_childpno"      "jkl_country"      
## [19] "jkl_gor_dv"        "jkl_urban_dv"      "jkl_sex_dv"       
## [22] "jkl_mastat_dv"     "jkl_jbstat"        "jkl_ethn_dv"      
## [25] "bornuk_dv"         "jkl_jbsoc00_cc"    "jkl_jbnssec8_dv"  
## [28] "jkl_jbnssec5_dv"   "jkl_jbnssec3_dv"   "jkl_fimnnet_dv"   
## [31] "jkl_health"        "jkl_nkids_dv"      "jkl_hhtype_dv"    
## [34] "jkl_tenure_dv"     "jkl_fihhmnnet1_dv"

The variables in this dataset cover a wide range of demographic, socio-economic, and health-related factors.

Variables that showcase demographics include Household Identifiers (jkl_hidp, j_hidp, k_hidp, l_hidp), Country (jkl_country) & Region (jkl_gor_dv) variables and Ethnic Group (jkl_ethn_dv)

Household Identifiers

The Household Identifiers (jkl_hidp, j_hidp, k_hidp, l_hidp) are crucial for distinguishing between different households and linking data across different waves. Understanding household dynamics is essential for economic analyses, such as household income distribution and the impact of economic policies on households. By linking personal and household identifiers (pidp, jkl_hidp, j_hidp, k_hidp, l_hidp), researchers can track individual and household economic outcomes over time. This relationship is essential for longitudinal studies to understand how personal and household circumstances change and the impact of economic policies over different periods.

# Check uniqueness of personal identifiers
unique_pidp <- length(unique(data$pidp)) == nrow(data)
cat("Are all pidp values unique? ", unique_pidp, "\n")
## Are all pidp values unique?  FALSE
# Check consistency of household identifiers
unique_households <- data %>%
  select(jkl_hidp, j_hidp, k_hidp, l_hidp) %>%
  unique()

cat("Number of unique households: ", nrow(unique_households), "\n")
## Number of unique households:  13242
# Count the number of individuals per household
household_size <- data %>%
  group_by(jkl_hidp) %>%
  summarise(household_count = n())

# Summary statistics of household sizes
summary(household_size$household_count)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.000   1.438   2.000   7.000
# Create a frequency table for household sizes
household_size_freq <- household_size %>%
  count(household_count) %>%
  rename(frequency = n)

# Visualize household sizes with a line chart
ggplot(household_size_freq, aes(x = household_count, y = frequency)) +
  geom_line() +
  geom_point() +
  labs(title = "Distribution of Household Sizes", x = "Household Size", y = "Frequency")

The chart above reveals that the most common household size is 2 members, with a frequency just over 6000, followed by 3-member households at slightly above 4000. One-member households also have a significant frequency, though less than 3-member households. Household sizes larger than 3 are much less common, with frequencies quickly tapering off, and larger household sizes (4, 5, 6, etc.) have very low frequencies compared to smaller household sizes. These observations imply a greater demand for smaller housing units and tailored public services to accommodate the predominant household sizes.

Country (jkl_country) and Region (jkl_gor_dv) variables

The Country (jkl_country) and Region (jkl_gor_dv) variables help to analyze geographic disparities in economic conditions. Different regions may have varying levels of economic development, employment opportunities, and living standards, which are critical for regional policy-making and targeted economic interventions.

Urban and rural areas (jkl_urban_dv)

The Urban and rural areas that is showcased by (jkl_urban_dv) often have different economic structures. Urban areas might have more diverse job opportunities and higher living costs, while rural areas might face challenges like limited access to services and employment. This variable is essential for understanding these differences and tailoring economic policies accordingly.

Sex (jkl_sex_dv)

The Sex (derived) (jkl_sex_dv) variable highlights economic disparities between genders. Women often earn less than men due to the gender wage gap and face discrimination in hiring and promotions. They have less access to high-paying STEM fields and professional networks. Additionally, family responsibilities often lead to career interruptions and part-time work for women. Addressing these issues through targeted policies and workplace reforms is crucial for promoting gender equality and reducing economic disparities.

Ethnic Group (jkl_ethn_dv)

Ethnic Group (jkl_ethn_dv) can influence economic opportunities and outcomes due to factors like discrimination, social networks, and access to education. This variable is important for understanding and addressing economic disparities among different ethnic groups.

Employment Status (jkl_jbstat)

Employment Status (jkl_jbstat) is directly linked to income levels, economic stability, and overall well-being. Analyzing employment status can provide insights into labor market trends, unemployment rates, and the effectiveness of job creation programs.

The plots below showcase how employment status varies accross different demographics represented in the study.

key_variable <- "jkl_jbstat"
variables_to_pair <- c("jkl_country", "jkl_urban_dv", "jkl_sex_dv", "jkl_ethn_dv")

for (var in variables_to_pair){count_table <- table(data_factors[[key_variable]], data_factors[[var]])
  
  prop_table <- prop.table(count_table, 1)
  prop_df <- as.data.frame(prop_table)
  
  names(prop_df) <- c("KeyVariable", "PairedVariable", "Proportion")
  
  plot <- ggplot(prop_df, aes(x = KeyVariable, y = Proportion, fill = PairedVariable)) +
    geom_bar(stat = "identity", position = "fill") +
    geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
    labs(title = paste("Proportion of", var, "within", key_variable),
         x = key_variable, y = "Proportion") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  print(plot)}

# Calculate the proportions
data_proportion <- data %>%
  group_by(jkl_gor_dv, jkl_jbstat) %>%
  summarise(count = n()) %>%
  mutate(proportion = count / sum(count))
## `summarise()` has grouped output by 'jkl_gor_dv'. You can override using the
## `.groups` argument.
# Create the stacked bar chart
ggplot(data_proportion, aes(x = jkl_gor_dv, y = proportion, fill = jkl_jbstat)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  theme_minimal() +
  labs(
    title = "Proportion of Employment Status within Regions in the UK",
    x = "Region",
    y = "Proportion",
    fill = "Employment Status"
  ) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

# Change "Yorkshire and the Humber" to "Yorkshire and The Humber" to match the map 
data <- data %>%
  mutate(jkl_gor_dv = ifelse(jkl_gor_dv == "Yorkshire and the Humber", "Yorkshire and The Humber", jkl_gor_dv))

# Define the path to the shapefile
shapefile_path <- "/Users/mellogwayo/Desktop/Household Financial Health Analytics Platform/data/processed/RGN_DEC_2021_EN_BGC.shp"

# Read the shapefile
uk_shapefile <- st_read(shapefile_path)
## Reading layer `RGN_DEC_2021_EN_BGC' from data source 
##   `/Users/mellogwayo/Desktop/Household Financial Health Analytics Platform/data/processed/RGN_DEC_2021_EN_BGC.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 9 features and 7 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 82668.52 ymin: 5352.6 xmax: 655653.8 ymax: 657540.4
## Projected CRS: OSGB36 / British National Grid
# Simplify the geometry
uk_shapefile_simplified <- ms_simplify(uk_shapefile, keep = 0.01) 

# Unique values in the shapefile
cat("Unique values in shapefile (RGN21NM):\n")
## Unique values in shapefile (RGN21NM):
print(unique(uk_shapefile_simplified$RGN21NM))
## [1] "North East"               "North West"              
## [3] "Yorkshire and The Humber" "East Midlands"           
## [5] "West Midlands"            "East of England"         
## [7] "London"                   "South East"              
## [9] "South West"
# Unique values in your data
cat("Unique values in data (jkl_gor_dv):\n")
## Unique values in data (jkl_gor_dv):
print(unique(data$jkl_gor_dv))
##  [1] "North East"               "West Midlands"           
##  [3] "North West"               "Yorkshire and The Humber"
##  [5] "Scotland"                 "East Midlands"           
##  [7] "London"                   "South East"              
##  [9] "South West"               "East of England"         
## [11] "Wales"                    "Northern Ireland"        
## [13] "missing"
# Inspect column names in the shapefile
cat("Column names in the shapefile:\n")
## Column names in the shapefile:
print(names(uk_shapefile_simplified))
## [1] "RGN21CD"  "BNG_N"    "RGN21NM"  "BNG_E"    "LONG"     "LAT"      "GlobalID"
## [8] "geometry"
# Inspect column names in your data
cat("Column names in your data:\n")
## Column names in your data:
print(names(data))
##  [1] "pidp"              "jkl_hidp"          "j_hidp"           
##  [4] "k_hidp"            "l_hidp"            "jkl_pno"          
##  [7] "jkl_mnpno"         "jkl_fnpno"         "jkl_mnpid"        
## [10] "jkl_fnpid"         "jkl_mnspno"        "jkl_fnspno"       
## [13] "jkl_mnspid"        "jkl_fnspid"        "jkl_grmpno"       
## [16] "jkl_grfpno"        "jkl_childpno"      "jkl_country"      
## [19] "jkl_gor_dv"        "jkl_urban_dv"      "jkl_sex_dv"       
## [22] "jkl_mastat_dv"     "jkl_jbstat"        "jkl_ethn_dv"      
## [25] "bornuk_dv"         "jkl_jbsoc00_cc"    "jkl_jbnssec8_dv"  
## [28] "jkl_jbnssec5_dv"   "jkl_jbnssec3_dv"   "jkl_fimnnet_dv"   
## [31] "jkl_health"        "jkl_nkids_dv"      "jkl_hhtype_dv"    
## [34] "jkl_tenure_dv"     "jkl_fihhmnnet1_dv"
# Define the regions present in the shapefile
regions_in_shapefile <- unique(uk_shapefile_simplified$RGN21NM)

# Filter the data to include only the regions present in the shapefile
filtered_data <- data %>%
  filter(jkl_gor_dv %in% regions_in_shapefile)

# Merge simplified shapefile with the filtered data
uk_data_filtered <- uk_shapefile_simplified %>%
  left_join(filtered_data, by = c("RGN21NM" = "jkl_gor_dv"))

# Plot the map
ggplot(data = uk_data_filtered) +
  geom_sf(aes(fill = jkl_jbstat)) +
  scale_fill_viridis_d() +
  theme_minimal() +
  labs(
    title = "Employment Status in Different Regions in the UK",
    fill = "Employment Status"
  )

# Summarize the data to get proportions
data_summary <- data %>%
  group_by(jkl_jbstat, jkl_ethn_dv) %>%
  summarise(count = n(), .groups = 'drop') %>%
  mutate(proportion = count / sum(count))

#Long format for pie chart 
data_long <- data_summary %>%
  select(jkl_jbstat, jkl_ethn_dv, proportion)

# Get unique categories of jkl_jbstat
categories <- unique(data_long$jkl_jbstat)

# Generate and print pie charts for each category
for (category in categories) {
  plot_data <- data_long %>% filter(jkl_jbstat == category)
  
  p <- ggplot(plot_data, aes(x = "", y = proportion, fill = jkl_ethn_dv)) +
    geom_bar(width = 1, stat = "identity") +
    coord_polar("y", start = 0) +
    theme_minimal(base_size = 15) +
    labs(title = paste("Proportion of jkl_ethn_dv within", category),
         fill = "Ethnic Group",
         y = "Proportion",
         x = "") +
    theme(
      axis.text.x = element_blank(),
      axis.ticks = element_blank(),
      plot.title = element_text(size = 16)
    )
  
  # Print the plot
  print(p)
}

Socio-economic Classification

Standard Socio-economic Classification (SOC) (jkl_jbsoc00_cc, jkl_jbnssec8_dv, jkl_jbnssec5_dv, jkl_jbnssec3_dv) provide a detailed look at occupational status and social class. They are crucial for studying social mobility, occupational earnings, and the impact of economic policies on different socio-economic groups.

key_variables <- c("jkl_jbnssec8_dv", "jkl_jbnssec5_dv", "jkl_jbnssec3_dv")
variables_to_pair <- c("jkl_country", "jkl_urban_dv", "jkl_sex_dv", "jkl_ethn_dv")

for (key_variable in key_variables) {
  for (var in variables_to_pair) {
    count_table <- table(data_factors[[key_variable]], data_factors[[var]])
    
    prop_table <- prop.table(count_table, 1)
    prop_df <- as.data.frame(prop_table)
    
    names(prop_df) <- c("KeyVariable", "PairedVariable", "Proportion")
    
    plot <- ggplot(prop_df, aes(x = KeyVariable, y = Proportion, fill = PairedVariable)) +
      geom_bar(stat = "identity", position = "fill") +
      geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
      labs(title = paste("Proportion of", var, "within", key_variable),
           x = key_variable, y = "Proportion") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    
    print(plot)
  }
}

# List of key variables
key_variables <- c("jkl_jbnssec8_dv", "jkl_jbnssec5_dv", "jkl_jbnssec3_dv")

# Loop over each key variable
for (key_variable in key_variables) {
  # Filter out 'inapplicable' category and calculate the proportions
  data_proportion <- data %>%
    filter(!!sym(key_variable) != "inapplicable") %>%
    group_by(jkl_gor_dv, !!sym(key_variable)) %>%
    summarise(count = n(), .groups = 'drop') %>%
    mutate(proportion = count / sum(count))
  
  # Create the stacked bar chart
  plot <- ggplot(data_proportion, aes(x = jkl_gor_dv, y = proportion, fill = !!sym(key_variable))) +
    geom_bar(stat = "identity", position = "fill") +
    scale_y_continuous(labels = scales::percent_format()) +
    theme_minimal() +
    labs(
      title = paste("Proportion of", key_variable, "within Regions in the UK"),
      x = "Region",
      y = "Proportion",
      fill = key_variable
    ) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1)
    )
  
  # Print the plot
  print(plot)
}

# List of key variables
key_variables <- c("jkl_jbnssec8_dv", "jkl_jbnssec5_dv", "jkl_jbnssec3_dv")

# Print unique values and column names for debugging
cat("Unique values in shapefile (RGN21NM):\n")
## Unique values in shapefile (RGN21NM):
print(unique(uk_shapefile_simplified$RGN21NM))
## [1] "North East"               "North West"              
## [3] "Yorkshire and The Humber" "East Midlands"           
## [5] "West Midlands"            "East of England"         
## [7] "London"                   "South East"              
## [9] "South West"
cat("Unique values in data (jkl_gor_dv):\n")
## Unique values in data (jkl_gor_dv):
print(unique(data$jkl_gor_dv))
##  [1] "North East"               "West Midlands"           
##  [3] "North West"               "Yorkshire and The Humber"
##  [5] "Scotland"                 "East Midlands"           
##  [7] "London"                   "South East"              
##  [9] "South West"               "East of England"         
## [11] "Wales"                    "Northern Ireland"        
## [13] "missing"
cat("Column names in the shapefile:\n")
## Column names in the shapefile:
print(names(uk_shapefile_simplified))
## [1] "RGN21CD"  "BNG_N"    "RGN21NM"  "BNG_E"    "LONG"     "LAT"      "GlobalID"
## [8] "geometry"
cat("Column names in your data:\n")
## Column names in your data:
print(names(data))
##  [1] "pidp"              "jkl_hidp"          "j_hidp"           
##  [4] "k_hidp"            "l_hidp"            "jkl_pno"          
##  [7] "jkl_mnpno"         "jkl_fnpno"         "jkl_mnpid"        
## [10] "jkl_fnpid"         "jkl_mnspno"        "jkl_fnspno"       
## [13] "jkl_mnspid"        "jkl_fnspid"        "jkl_grmpno"       
## [16] "jkl_grfpno"        "jkl_childpno"      "jkl_country"      
## [19] "jkl_gor_dv"        "jkl_urban_dv"      "jkl_sex_dv"       
## [22] "jkl_mastat_dv"     "jkl_jbstat"        "jkl_ethn_dv"      
## [25] "bornuk_dv"         "jkl_jbsoc00_cc"    "jkl_jbnssec8_dv"  
## [28] "jkl_jbnssec5_dv"   "jkl_jbnssec3_dv"   "jkl_fimnnet_dv"   
## [31] "jkl_health"        "jkl_nkids_dv"      "jkl_hhtype_dv"    
## [34] "jkl_tenure_dv"     "jkl_fihhmnnet1_dv"
# Define the regions present in the shapefile
regions_in_shapefile <- unique(uk_shapefile_simplified$RGN21NM)

# Filter the data to include only the regions present in the shapefile
filtered_data <- data %>%
  filter(jkl_gor_dv %in% regions_in_shapefile)

# Loop over each key variable
for (key_variable in key_variables) {
  # Remove 'inapplicable' category
  filtered_data_variable <- filtered_data %>%
    filter(!!sym(key_variable) != "inapplicable")
  
  # Merge simplified shapefile with the filtered data for the current key variable
  uk_data_filtered <- uk_shapefile_simplified %>%
    left_join(filtered_data_variable, by = c("RGN21NM" = "jkl_gor_dv"))
  
  # Plot the map for the current key variable
  plot <- ggplot(data = uk_data_filtered) +
    geom_sf(aes_string(fill = key_variable)) +
    scale_fill_viridis_d() +
    theme_minimal() +
    labs(
      title = paste("Proportion of", key_variable, "in Different Regions in the UK"),
      fill = key_variable
    )
  
  # Print the plot
  print(plot)
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# List of key variables for pie charts
key_variables <- c("jkl_jbnssec8_dv", "jkl_jbnssec5_dv", "jkl_jbnssec3_dv")

# Loop over each key variable
for (key_variable in key_variables) {
  # Summarize the data to get proportions
  data_summary <- data %>%
    group_by(!!sym(key_variable), jkl_ethn_dv) %>%
    summarise(count = n(), .groups = 'drop') %>%
    mutate(proportion = count / sum(count))
  
  # Long format for pie chart 
  data_long <- data_summary %>%
    select(!!sym(key_variable), jkl_ethn_dv, proportion)
  
  # Get unique categories of the current key variable
  categories <- unique(data_long[[key_variable]])
  
  # Generate and print pie charts for each category
  for (category in categories) {
    plot_data <- data_long %>% filter((!!sym(key_variable)) == category)
    
    p <- ggplot(plot_data, aes(x = "", y = proportion, fill = jkl_ethn_dv)) +
      geom_bar(width = 1, stat = "identity") +
      coord_polar("y", start = 0) +
      theme_minimal(base_size = 15) +
      labs(
        title = paste("Proportion of jkl_ethn_dv within", category, "of", key_variable),
        fill = "Ethnic Group",
        y = "Proportion",
        x = ""
      ) +
      theme(
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 16)
      )
    
    # Print the plot
    print(p)
  }
}

Health and Well-being

Health and Well-being (jkl_health) show how health status significantly affects economic productivity and quality of life. Poor health can lead to increased healthcare costs and reduced labor market participation. These variables are essential for understanding the economic impact of health and designing health-related economic policies.

key_variable <- "jkl_health"
variables_to_pair <- c("jkl_country", "jkl_urban_dv", "jkl_sex_dv", "jkl_ethn_dv")

for (var in variables_to_pair) {
  count_table <- table(data_factors[[key_variable]], data_factors[[var]])
  
  prop_table <- prop.table(count_table, 1)
  prop_df <- as.data.frame(prop_table)
  
  names(prop_df) <- c("KeyVariable", "PairedVariable", "Proportion")
  
  plot <- ggplot(prop_df, aes(x = KeyVariable, y = Proportion, fill = PairedVariable)) +
    geom_bar(stat = "identity", position = "fill") +
    geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
    labs(title = paste("Proportion of", var, "within", key_variable),
         x = key_variable, y = "Proportion") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  print(plot)
}

# List of key variables
key_variables <- "jkl_health"

# Loop over each key variable
for (key_variable in key_variables) {
  # Filter out 'inapplicable' category and calculate the proportions
  data_proportion <- data %>%
    filter(!!sym(key_variable) != "inapplicable") %>%
    group_by(jkl_gor_dv, !!sym(key_variable)) %>%
    summarise(count = n(), .groups = 'drop') %>%
    mutate(proportion = count / sum(count))
  
  # Create the stacked bar chart
  plot <- ggplot(data_proportion, aes(x = jkl_gor_dv, y = proportion, fill = !!sym(key_variable))) +
    geom_bar(stat = "identity", position = "fill") +
    scale_y_continuous(labels = scales::percent_format()) +
    theme_minimal() +
    labs(
      title = paste("Proportion of", key_variable, "within Regions in the UK"),
      x = "Region",
      y = "Proportion",
      fill = key_variable
    ) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1)
    )
  
  # Print the plot
  print(plot)
}

# List of key variables
key_variables <- "jkl_health"

# Print unique values and column names for debugging
cat("Unique values in shapefile (RGN21NM):\n")
## Unique values in shapefile (RGN21NM):
print(unique(uk_shapefile_simplified$RGN21NM))
## [1] "North East"               "North West"              
## [3] "Yorkshire and The Humber" "East Midlands"           
## [5] "West Midlands"            "East of England"         
## [7] "London"                   "South East"              
## [9] "South West"
cat("Unique values in data (jkl_gor_dv):\n")
## Unique values in data (jkl_gor_dv):
print(unique(data$jkl_gor_dv))
##  [1] "North East"               "West Midlands"           
##  [3] "North West"               "Yorkshire and The Humber"
##  [5] "Scotland"                 "East Midlands"           
##  [7] "London"                   "South East"              
##  [9] "South West"               "East of England"         
## [11] "Wales"                    "Northern Ireland"        
## [13] "missing"
cat("Column names in the shapefile:\n")
## Column names in the shapefile:
print(names(uk_shapefile_simplified))
## [1] "RGN21CD"  "BNG_N"    "RGN21NM"  "BNG_E"    "LONG"     "LAT"      "GlobalID"
## [8] "geometry"
cat("Column names in your data:\n")
## Column names in your data:
print(names(data))
##  [1] "pidp"              "jkl_hidp"          "j_hidp"           
##  [4] "k_hidp"            "l_hidp"            "jkl_pno"          
##  [7] "jkl_mnpno"         "jkl_fnpno"         "jkl_mnpid"        
## [10] "jkl_fnpid"         "jkl_mnspno"        "jkl_fnspno"       
## [13] "jkl_mnspid"        "jkl_fnspid"        "jkl_grmpno"       
## [16] "jkl_grfpno"        "jkl_childpno"      "jkl_country"      
## [19] "jkl_gor_dv"        "jkl_urban_dv"      "jkl_sex_dv"       
## [22] "jkl_mastat_dv"     "jkl_jbstat"        "jkl_ethn_dv"      
## [25] "bornuk_dv"         "jkl_jbsoc00_cc"    "jkl_jbnssec8_dv"  
## [28] "jkl_jbnssec5_dv"   "jkl_jbnssec3_dv"   "jkl_fimnnet_dv"   
## [31] "jkl_health"        "jkl_nkids_dv"      "jkl_hhtype_dv"    
## [34] "jkl_tenure_dv"     "jkl_fihhmnnet1_dv"
# Define the regions present in the shapefile
regions_in_shapefile <- unique(uk_shapefile_simplified$RGN21NM)

# Filter the data to include only the regions present in the shapefile
filtered_data <- data %>%
  filter(jkl_gor_dv %in% regions_in_shapefile)

# Loop over each key variable
for (key_variable in key_variables) {
  # Remove 'inapplicable' category
  filtered_data_variable <- filtered_data %>%
    filter(!!sym(key_variable) != "inapplicable")
  
  # Merge simplified shapefile with the filtered data for the current key variable
  uk_data_filtered <- uk_shapefile_simplified %>%
    left_join(filtered_data_variable, by = c("RGN21NM" = "jkl_gor_dv"))
  
  # Plot the map for the current key variable
  plot <- ggplot(data = uk_data_filtered) +
    geom_sf(aes_string(fill = key_variable)) +
    scale_fill_viridis_d() +
    theme_minimal() +
    labs(
      title = paste("Proportion of", key_variable, "in Different Regions in the UK"),
      fill = key_variable
    )
  
  # Print the plot
  print(plot)
}

# List of key variables for pie charts
key_variables <- "jkl_health"

# Loop over each key variable
for (key_variable in key_variables) {
  # Summarize the data to get proportions
  data_summary <- data %>%
    group_by(!!sym(key_variable), jkl_ethn_dv) %>%
    summarise(count = n(), .groups = 'drop') %>%
    mutate(proportion = count / sum(count))
  
  # Long format for pie chart 
  data_long <- data_summary %>%
    select(!!sym(key_variable), jkl_ethn_dv, proportion)
  
  # Get unique categories of the current key variable
  categories <- unique(data_long[[key_variable]])
  
  # Generate and print pie charts for each category
  for (category in categories) {
    plot_data <- data_long %>% filter((!!sym(key_variable)) == category)
    
    p <- ggplot(plot_data, aes(x = "", y = proportion, fill = jkl_ethn_dv)) +
      geom_bar(width = 1, stat = "identity") +
      coord_polar("y", start = 0) +
      theme_minimal(base_size = 15) +
      labs(
        title = paste("Proportion of jkl_ethn_dv within", category, "of", key_variable),
        fill = "Ethnic Group",
        y = "Proportion",
        x = ""
      ) +
      theme(
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 16)
      )
    
    # Print the plot
    print(p)
  }
}

Household Composition and Size & Housing Tenure

Household Composition and Size (jkl_hhsize, jkl_nkids_dv, jkl_hhtype_dv) affects economic needs and consumption patterns. Larger households or those with more children may have different financial pressures compared to smaller households. These variables help in analyzing the economic needs of different household types.

Housing Tenure (jkl_tenure_dv), which show whether renting or owning affects household expenditure, wealth accumulation, and financial stability. This variable is important for studying housing market trends and the impact of housing policies.

key_variable <- "jkl_tenure_dv"
variables_to_pair <- c("jkl_country", "jkl_urban_dv", "jkl_sex_dv", "jkl_ethn_dv")

for (var in variables_to_pair) {
  count_table <- table(data_factors[[key_variable]], data_factors[[var]])
  
  prop_table <- prop.table(count_table, 1)
  prop_df <- as.data.frame(prop_table)
  
  names(prop_df) <- c("KeyVariable", "PairedVariable", "Proportion")
  
  plot <- ggplot(prop_df, aes(x = KeyVariable, y = Proportion, fill = PairedVariable)) +
    geom_bar(stat = "identity", position = "fill") +
    geom_hline(yintercept = 0.5, linetype = "dashed", color = "red") +
    labs(title = paste("Proportion of", var, "within", key_variable),
         x = key_variable, y = "Proportion") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  print(plot)
}

# List of key variables
key_variables <- "jkl_tenure_dv"

# Loop over each key variable
for (key_variable in key_variables) {
  # Filter out 'inapplicable' category and calculate the proportions
  data_proportion <- data %>%
    filter(!!sym(key_variable) != "inapplicable") %>%
    group_by(jkl_gor_dv, !!sym(key_variable)) %>%
    summarise(count = n(), .groups = 'drop') %>%
    mutate(proportion = count / sum(count))
  
  # Create the stacked bar chart
  plot <- ggplot(data_proportion, aes(x = jkl_gor_dv, y = proportion, fill = !!sym(key_variable))) +
    geom_bar(stat = "identity", position = "fill") +
    scale_y_continuous(labels = scales::percent_format()) +
    theme_minimal() +
    labs(
      title = paste("Proportion of", key_variable, "within Regions in the UK"),
      x = "Region",
      y = "Proportion",
      fill = key_variable
    ) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1)
    )
  
  # Print the plot
  print(plot)
}

# List of key variables
key_variables <- "jkl_tenure_dv"

# Print unique values and column names for debugging
cat("Unique values in shapefile (RGN21NM):\n")
## Unique values in shapefile (RGN21NM):
print(unique(uk_shapefile_simplified$RGN21NM))
## [1] "North East"               "North West"              
## [3] "Yorkshire and The Humber" "East Midlands"           
## [5] "West Midlands"            "East of England"         
## [7] "London"                   "South East"              
## [9] "South West"
cat("Unique values in data (jkl_gor_dv):\n")
## Unique values in data (jkl_gor_dv):
print(unique(data$jkl_gor_dv))
##  [1] "North East"               "West Midlands"           
##  [3] "North West"               "Yorkshire and The Humber"
##  [5] "Scotland"                 "East Midlands"           
##  [7] "London"                   "South East"              
##  [9] "South West"               "East of England"         
## [11] "Wales"                    "Northern Ireland"        
## [13] "missing"
cat("Column names in the shapefile:\n")
## Column names in the shapefile:
print(names(uk_shapefile_simplified))
## [1] "RGN21CD"  "BNG_N"    "RGN21NM"  "BNG_E"    "LONG"     "LAT"      "GlobalID"
## [8] "geometry"
cat("Column names in your data:\n")
## Column names in your data:
print(names(data))
##  [1] "pidp"              "jkl_hidp"          "j_hidp"           
##  [4] "k_hidp"            "l_hidp"            "jkl_pno"          
##  [7] "jkl_mnpno"         "jkl_fnpno"         "jkl_mnpid"        
## [10] "jkl_fnpid"         "jkl_mnspno"        "jkl_fnspno"       
## [13] "jkl_mnspid"        "jkl_fnspid"        "jkl_grmpno"       
## [16] "jkl_grfpno"        "jkl_childpno"      "jkl_country"      
## [19] "jkl_gor_dv"        "jkl_urban_dv"      "jkl_sex_dv"       
## [22] "jkl_mastat_dv"     "jkl_jbstat"        "jkl_ethn_dv"      
## [25] "bornuk_dv"         "jkl_jbsoc00_cc"    "jkl_jbnssec8_dv"  
## [28] "jkl_jbnssec5_dv"   "jkl_jbnssec3_dv"   "jkl_fimnnet_dv"   
## [31] "jkl_health"        "jkl_nkids_dv"      "jkl_hhtype_dv"    
## [34] "jkl_tenure_dv"     "jkl_fihhmnnet1_dv"
# Define the regions present in the shapefile
regions_in_shapefile <- unique(uk_shapefile_simplified$RGN21NM)

# Filter the data to include only the regions present in the shapefile
filtered_data <- data %>%
  filter(jkl_gor_dv %in% regions_in_shapefile)

# Loop over each key variable
for (key_variable in key_variables) {
  # Remove 'inapplicable' category
  filtered_data_variable <- filtered_data %>%
    filter(!!sym(key_variable) != "inapplicable")
  
  # Merge simplified shapefile with the filtered data for the current key variable
  uk_data_filtered <- uk_shapefile_simplified %>%
    left_join(filtered_data_variable, by = c("RGN21NM" = "jkl_gor_dv"))
  
  # Plot the map for the current key variable
  plot <- ggplot(data = uk_data_filtered) +
    geom_sf(aes_string(fill = key_variable)) +
    scale_fill_viridis_d() +
    theme_minimal() +
    labs(
      title = paste("Proportion of", key_variable, "in Different Regions in the UK"),
      fill = key_variable
    )
  
  # Print the plot
  print(plot)
}

# List of key variables for pie charts
key_variables <- "jkl_tenure_dv"

# Loop over each key variable
for (key_variable in key_variables) {
  # Summarize the data to get proportions
  data_summary <- data %>%
    group_by(!!sym(key_variable), jkl_ethn_dv) %>%
    summarise(count = n(), .groups = 'drop') %>%
    mutate(proportion = count / sum(count))
  
  #Long format for pie chart 
  data_long <- data_summary %>%
    select(!!sym(key_variable), jkl_ethn_dv, proportion)
  
  # Get unique categories of the current key variable
  categories <- unique(data_long[[key_variable]])
  
  # Generate and print pie charts for each category
  for (category in categories) {
    plot_data <- data_long %>% filter((!!sym(key_variable)) == category)
    
    p <- ggplot(plot_data, aes(x = "", y = proportion, fill = jkl_ethn_dv)) +
      geom_bar(width = 1, stat = "identity") +
      coord_polar("y", start = 0) +
      theme_minimal(base_size = 15) +
      labs(
        title = paste("Proportion of jkl_ethn_dv within", category, "of", key_variable),
        fill = "Ethnic Group",
        y = "Proportion",
        x = ""
      ) +
      theme(
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(size = 16)
      )
    
    # Print the plot
    print(p)
  }
}

Household Income

Net Household Monthly Income (jkl_fihhmnnet1_dv) is a critical measure of economic well-being. It influences consumption, savings, and investment decisions. Analyzing household income helps in understanding economic inequality and the standard of living.

key_variable <- "jkl_fihhmnnet1_dv"
variables_to_pair <- c("jkl_country", "jkl_urban_dv", "jkl_sex_dv", "jkl_ethn_dv")

for (var in variables_to_pair) {
  if (!(var %in% names(data_factors))) {
    warning(paste("The variable", var, "is not present in data_factors. Skipping."))
    next
  }
  
  plot <- ggplot(data_factors, aes_string(x = var, y = key_variable, fill = var)) +
    geom_boxplot() +
    geom_hline(yintercept = median(data_factors[[key_variable]], na.rm = TRUE), linetype = "dashed", color = "red") +
    labs(title = paste("Distribution of", key_variable, "across", var),
         x = var, y = "Net Household Monthly Income") +
    ylim(0, 12500) +  # Set the y-axis limit
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  # Print plot to the R console
  print(plot)
}
## Warning: Removed 182 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 182 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 182 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 182 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# Define the numerical key variable
key_variable <- "jkl_fihhmnnet1_dv"

# Print unique values and column names for debugging
cat("Unique values in shapefile (RGN21NM):\n")
## Unique values in shapefile (RGN21NM):
print(unique(uk_shapefile_simplified$RGN21NM))
## [1] "North East"               "North West"              
## [3] "Yorkshire and The Humber" "East Midlands"           
## [5] "West Midlands"            "East of England"         
## [7] "London"                   "South East"              
## [9] "South West"
cat("Unique values in data (jkl_gor_dv):\n")
## Unique values in data (jkl_gor_dv):
print(unique(data$jkl_gor_dv))
##  [1] "North East"               "West Midlands"           
##  [3] "North West"               "Yorkshire and The Humber"
##  [5] "Scotland"                 "East Midlands"           
##  [7] "London"                   "South East"              
##  [9] "South West"               "East of England"         
## [11] "Wales"                    "Northern Ireland"        
## [13] "missing"
cat("Column names in the shapefile:\n")
## Column names in the shapefile:
print(names(uk_shapefile_simplified))
## [1] "RGN21CD"  "BNG_N"    "RGN21NM"  "BNG_E"    "LONG"     "LAT"      "GlobalID"
## [8] "geometry"
cat("Column names in your data:\n")
## Column names in your data:
print(names(data))
##  [1] "pidp"              "jkl_hidp"          "j_hidp"           
##  [4] "k_hidp"            "l_hidp"            "jkl_pno"          
##  [7] "jkl_mnpno"         "jkl_fnpno"         "jkl_mnpid"        
## [10] "jkl_fnpid"         "jkl_mnspno"        "jkl_fnspno"       
## [13] "jkl_mnspid"        "jkl_fnspid"        "jkl_grmpno"       
## [16] "jkl_grfpno"        "jkl_childpno"      "jkl_country"      
## [19] "jkl_gor_dv"        "jkl_urban_dv"      "jkl_sex_dv"       
## [22] "jkl_mastat_dv"     "jkl_jbstat"        "jkl_ethn_dv"      
## [25] "bornuk_dv"         "jkl_jbsoc00_cc"    "jkl_jbnssec8_dv"  
## [28] "jkl_jbnssec5_dv"   "jkl_jbnssec3_dv"   "jkl_fimnnet_dv"   
## [31] "jkl_health"        "jkl_nkids_dv"      "jkl_hhtype_dv"    
## [34] "jkl_tenure_dv"     "jkl_fihhmnnet1_dv"
# Define the regions present in the shapefile
regions_in_shapefile <- unique(uk_shapefile_simplified$RGN21NM)

# Filter the data to include only the regions present in the shapefile
filtered_data <- data %>%
  filter(jkl_gor_dv %in% regions_in_shapefile)

# Bin the numerical variable into categories
filtered_data <- filtered_data %>%
  mutate(income_bin = cut(
    !!sym(key_variable),
    breaks = c(-Inf, 1000, 2000, 3000, 4000, 5000, Inf),
    labels = c("<=1000", "1001-2000", "2001-3000", "3001-4000", "4001-5000", ">5000"),
    right = FALSE
  ))

# Summarize the data to get proportions
data_summary <- filtered_data %>%
  group_by(jkl_gor_dv, income_bin) %>%
  summarise(count = n(), .groups = 'drop') %>%
  mutate(proportion = count / sum(count))

# Merge simplified shapefile with the filtered data
uk_data_filtered <- uk_shapefile_simplified %>%
  left_join(data_summary, by = c("RGN21NM" = "jkl_gor_dv"))

# Plot the map for each income bin
for (bin in unique(uk_data_filtered$income_bin)) {
  plot_data <- uk_data_filtered %>%
    filter(income_bin == bin)
  
  plot <- ggplot(data = plot_data) +
    geom_sf(aes(fill = proportion)) +
    scale_fill_viridis_c() +
    theme_minimal() +
    labs(
      title = paste("Proportion of Income Bin", bin, "in Different Regions in the UK"),
      fill = "Proportion"
    )
  
  # Print the plot
  print(plot)
}

# Define the numerical key variable
key_variable <- "jkl_fihhmnnet1_dv"

# Bin the numerical variable into categories
data <- data %>%
  mutate(income_bin = cut(
    !!sym(key_variable),
    breaks = c(-Inf, 1000, 2000, 3000, 4000, 5000, Inf),
    labels = c("<=1000", "1001-2000", "2001-3000", "3001-4000", "4001-5000", ">5000"),
    right = FALSE
  ))

# Summarize the data to get proportions
data_summary <- data %>%
  group_by(income_bin, jkl_ethn_dv) %>%
  summarise(count = n(), .groups = 'drop') %>%
  mutate(proportion = count / sum(count))

#Long format for pie chart 
data_long <- data_summary %>%
  select(income_bin, jkl_ethn_dv, proportion)

# Get unique categories of the binned income
categories <- unique(data_long$income_bin)

# Generate and print pie charts for each category
for (category in categories) {
  plot_data <- data_long %>% filter(income_bin == category)
  
  p <- ggplot(plot_data, aes(x = "", y = proportion, fill = jkl_ethn_dv)) +
    geom_bar(width = 1, stat = "identity") +
    coord_polar("y", start = 0) +
    theme_minimal(base_size = 15) +
    labs(
      title = paste("Proportion of jkl_ethn_dv within Income Bin", category),
      fill = "Ethnic Group",
      y = "Proportion",
      x = ""
    ) +
    theme(
      axis.text.x = element_blank(),
      axis.ticks = element_blank(),
      plot.title = element_text(size = 16)
    )
  
  # Print the plot
  print(p)
}

Variable interactions

Understanding the relationships between these variables can provide deeper insights into various economic phenomena. Therefore, it is crucial to understand the potential relationships and their economic implications.

Country and Region, Employment Status and Income

Analyzing the relationships between Country and Region variables (jkl_country, jkl_gor_dv) with Employment Status (jkl_jbstat) and Income variables (jkl_fimnnet_dv, jkl_fimnlabnet_dv, jkl_fihhmnnet1_dv) can reveal geographic disparities in employment opportunities and income levels. For instance, some regions may have higher unemployment rates and lower income levels, indicating the need for regional economic development programs or targeted employment policies.

England
# Filter data for England
england_data <- data %>%
  filter(jkl_country == "England")

# Divide the categories into two subsets
categories <- unique(england_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
england_data_1 <- england_data %>%
  filter(jkl_jbstat %in% categories_1)

england_data_2 <- england_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(england_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in England (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 110000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(england_data_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in England (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

print(plot2)

# Calculate summary statistics
summary_statistics <- england_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 16 × 9
##    jkl_jbstat                count  mean median    sd     min    max    Q1    Q3
##    <chr>                     <int> <dbl>  <dbl> <dbl>   <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else         77 2857.  2712. 2103.     0   8.99e3 1128. 3850.
##  2 Family care or home         332 3645.  2939. 2949.     0   2.54e4 1761. 4724.
##  3 Full-time student            89 1942.  1633. 1610.     0   6.02e3  633. 2685.
##  4 Govt training scheme          1 2722   2722    NA   2722   2.72e3 2722  2722 
##  5 LT sick or disabled         497 2302.  1869. 1802.     0   2.10e4 1302. 2871.
##  6 On apprenticeship             6 2502.  2921  1359.   650   4.12e3 1499. 3257.
##  7 On furlough                 113 3289.  2870. 2279.     0   1.70e4 1783. 4135.
##  8 On maternity leave            8 4195.  2075. 5007.   876.  1.59e4 1595. 4214.
##  9 Paid employment(ft/pt)     5959 4048.  3600  2817. -3306.  1.04e5 2363. 5091.
## 10 Retired                    6151 2947.  2461. 2053.     0   2.31e4 1622. 3659.
## 11 Self employed              1118 4142.  3245. 4620. -3306.  1.04e5 1839. 4995.
## 12 Temporarily laid off/sho…    15 2858.  2388. 2304.    80   7.98e3 1130. 3838.
## 13 Unemployed                  437 2032.  1647. 1975.     0   1.99e4  740. 2766.
## 14 Unpaid, family business      10 3532.  3062. 2220.  1300   8.45e3 1990. 4430.
## 15 don't know                    7 3237.  1138. 4587.     0   9.92e3  272. 5528.
## 16 refusal                      16 6383.  3795. 7148.    24.9 1.99e4 1897. 8049.
Northern Ireland
# Filter data 
Northern_Ireland_data <- data %>%
  filter(jkl_country == "Northern Ireland")

# Divide the categories into two subsets
categories <- unique(Northern_Ireland_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
Northern_Ireland_data_1 <- Northern_Ireland_data %>%
  filter(jkl_jbstat %in% categories_1)

Northern_Ireland_2 <- Northern_Ireland_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(Northern_Ireland_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Northern Ireland (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 30000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(Northern_Ireland_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Northern Ireland (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

print(plot2)

# Calculate summary statistics
summary_statistics <- Northern_Ireland_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 11 × 9
##    jkl_jbstat              count  mean median    sd   min    max    Q1    Q3
##    <chr>                   <int> <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else       11 2722.  1900. 2145.  240   6968. 1077. 4167.
##  2 Family care or home        24 3026.  2564. 1704. 1136.  7957. 1798. 3821.
##  3 Full-time student           1 2173.  2173.   NA  2173.  2173. 2173. 2173.
##  4 LT sick or disabled        88 2199.  1968. 1387.    0   9967. 1285. 2837.
##  5 On furlough                 7 2995.  2757. 1205. 1427.  4626. 2184. 3896.
##  6 On maternity leave          1 1683.  1683.   NA  1683.  1683. 1683. 1683.
##  7 Paid employment(ft/pt)    403 3789.  3362. 2217.    0  21825. 2174. 4957 
##  8 Retired                   504 2781.  2275. 2196.    0  23362. 1504. 3363.
##  9 Self employed              70 3585.  2799. 3269. -794. 21825. 1543. 4686.
## 10 Unemployed                 36 1590.  1090. 1306.    0   4472.  636. 2301.
## 11 Unpaid, family business     1 2481.  2481.   NA  2481.  2481. 2481. 2481.
Scotland
# Filter data 
Scotland_data <- data %>%
  filter(jkl_country == "Scotland")

# Divide the categories into two subsets
categories <- unique(Scotland_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
Scotland_data_1 <- Scotland_data %>%
  filter(jkl_jbstat %in% categories_1)

Scotland_2 <- Scotland_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(Scotland_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Scotland (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 40000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(Scotland_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Scotland (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

# Calculate summary statistics
summary_statistics <- Scotland_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 14 × 9
##    jkl_jbstat             count   mean median     sd    min    max     Q1     Q3
##    <chr>                  <int>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 Doing something else       6  1740.  1635.  1430.   315.  4179.   673.  2150.
##  2 Family care or home       23  3156.  2951.  2070.   141.  8957   1799.  4138.
##  3 Full-time student         21  2886.  1680   4213.   155. 19036.   731.  2611.
##  4 Govt training scheme       1  1635.  1635.    NA   1635.  1635.  1635.  1635.
##  5 LT sick or disabled       80  2400.  1995.  2639.     0  23136.  1218.  2858.
##  6 On furlough               16  2957.  3155.  1718.   200   7386.  1512.  3796.
##  7 On maternity leave         5  3688.  3104.  1659.  2102   6192.  2563.  4476.
##  8 Paid employment(ft/pt)   691  3709.  3312.  2409.     0  30812.  2204.  4727.
##  9 Retired                  779  2740.  2271.  1951.     0  14223.  1501.  3318.
## 10 Self employed            109  3318.  2636.  2313.     0  10658.  1667.  4629.
## 11 Temporarily laid off/…     3 12389.  4464. 16007.  1891. 30812.  3178. 17638.
## 12 Unemployed                34  1660.  1239.  1181.     0   4844.   888.  2377.
## 13 Unpaid, family busine…     1  4710.  4710.    NA   4710.  4710.  4710.  4710.
## 14 refusal                    1 11364. 11364.    NA  11364. 11364. 11364. 11364.
Wales
# Filter data 
Wales_data <- data %>%
  filter(jkl_country == "Wales")

# Divide the categories into two subsets
categories <- unique(Wales_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
Wales_data_1 <- Wales_data %>%
  filter(jkl_jbstat %in% categories_1)

Wales_2 <- Wales_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(Wales_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Wales (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 40000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(Wales_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Wales (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)

# Calculate summary statistics
summary_statistics <- Wales_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 13 × 9
##    jkl_jbstat                 count  mean median    sd    min    max    Q1    Q3
##    <chr>                      <int> <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else           7 2918.  2712. 1770. 1007.   5780. 1619. 3843.
##  2 Family care or home           20 2817.  3062. 1393.   52.5  5923. 1873. 3531.
##  3 Full-time student              6 2454.  2325. 1543.  933.   4502. 1124. 3518.
##  4 LT sick or disabled           50 2777.  2332. 1415.  784    7087. 1821. 3584.
##  5 On furlough                   10 2941.  2725. 1647.  558.   6757. 2605. 3420.
##  6 Paid employment(ft/pt)       472 3598.  3343. 1906.  498   12391. 2245. 4434.
##  7 Retired                      605 2600.  2279. 1493.    0    8778. 1535. 3403.
##  8 Self employed                 80 3483.  3251. 2141.   52.5 13255. 2119. 4286.
##  9 Temporarily laid off/shor…     1 3562.  3562.   NA  3562.   3562. 3562. 3562.
## 10 Unemployed                    29 1450.  1540.  988.   54.2  4094.  498  2028.
## 11 Unpaid, family business        1 4704.  4704.   NA  4704.   4704. 4704. 4704.
## 12 don't know                     1 1400   1400    NA  1400    1400  1400  1400 
## 13 refusal                        1 3951.  3951.   NA  3951.   3951. 3951. 3951.
North East
# Filter data 
North_East_data <- data %>%
  filter(jkl_gor_dv == "North East")

# Divide the categories into two subsets
categories <- unique(North_East_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
North_East_data_1 <- North_East_data %>%
  filter(jkl_jbstat %in% categories_1)

North_East_2 <- North_East_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(North_East_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in North East (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 40000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(North_East_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in North East (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)

# Calculate summary statistics
summary_statistics <- North_East_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 10 × 9
##    jkl_jbstat                count  mean median    sd     min    max    Q1    Q3
##    <chr>                     <int> <dbl>  <dbl> <dbl>   <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else          5 4105.  3627. 3012. 2.5 e+0  7516. 2831. 6551.
##  2 Family care or home          19 3863.  3204. 3695. 5.42e+2 15189. 1848. 3937.
##  3 Full-time student             3 1831.  2176.  597. 1.14e+3  2176. 1659. 2176.
##  4 LT sick or disabled          32 1843.  1807.  764. 6.5 e+1  3204. 1501. 2324.
##  5 On furlough                   4 2315.  2157.  950. 1.48e+3  3464  1563. 2908.
##  6 Paid employment(ft/pt)      273 3491.  3029. 2121. 2.71e+2 15189. 2140  4222.
##  7 Retired                     331 2480.  2267. 1272. 1.67e+1  7942. 1565. 3113.
##  8 Self employed                40 3446.  3078. 1513. 0        8022. 2717. 4474.
##  9 Temporarily laid off/sho…     1 1948.  1948.   NA  1.95e+3  1948. 1948. 1948.
## 10 Unemployed                   20 1737.  1485. 1319. 1.70e-1  5316.  775. 2234.
West Midlands
# Filter data 
West_Midlands_data <- data %>%
  filter(jkl_gor_dv == "West Midlands")

# Divide the categories into two subsets
categories <- unique(West_Midlands_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
West_Midlands_data_1 <- West_Midlands_data %>%
  filter(jkl_jbstat %in% categories_1)

West_Midlands_2 <- West_Midlands_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(West_Midlands_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in West Midlands (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(West_Midlands_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in West Midlands (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)

# Calculate summary statistics
summary_statistics <- West_Midlands_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 11 × 9
##    jkl_jbstat              count  mean median    sd   min    max    Q1    Q3
##    <chr>                   <int> <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else       10 2997.  2390. 2321.  172.  6367. 1320. 5146.
##  2 Family care or home        34 3719.  3147. 2079.  640   8374. 2014. 4834.
##  3 Full-time student          11 1163.   521. 1592.    0   4601.  186. 1038.
##  4 LT sick or disabled        54 2578.  2007. 2129.    0  13241. 1422  3160.
##  5 On furlough                14 4500.  3525. 4072.  622. 16958. 2297. 5207.
##  6 Paid employment(ft/pt)    605 3943.  3544. 2298.    0  13241. 2200  5023.
##  7 Retired                   660 2961.  2474. 1982.  309. 12932. 1625. 3708.
##  8 Self employed              80 3821.  3290. 3015.    0  13538. 1944. 4733.
##  9 Unemployed                 52 1983.  1470. 1950.    0   9457.  763. 2484.
## 10 Unpaid, family business     3 2578.  2106.  954. 1952.  3675. 2029. 2891.
## 11 refusal                     1 3561.  3561.   NA  3561.  3561. 3561. 3561.
North West
# Filter data 
North_West_data <- data %>%
  filter(jkl_gor_dv == "North West")

# Divide the categories into two subsets
categories <- unique(North_West_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
North_West_data_1 <- North_West_data %>%
  filter(jkl_jbstat %in% categories_1)

North_West_2 <- North_West_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(North_West_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in North West (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 30000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(North_West_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in North West (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)

# Calculate summary statistics
summary_statistics <- North_West_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 14 × 9
##    jkl_jbstat                  count  mean median    sd   min    max    Q1    Q3
##    <chr>                       <int> <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else           12 2694.  1886. 2233.  677.  7572. 1089. 3579.
##  2 Family care or home            42 2614.  2468. 1351.  228.  7027. 1772. 3102.
##  3 Full-time student              11 1896.  1833. 1135.    0   4442. 1502. 2311.
##  4 LT sick or disabled            75 2145.  1878. 1226.    0   6016. 1331. 2710.
##  5 On apprenticeship               2  851.   851.  284.  650   1052.  751.  952.
##  6 On furlough                    11 2617.  1619. 2959.    0  10471. 1131. 3301.
##  7 Paid employment(ft/pt)        722 3839.  3353. 2459.    0  23588. 2298. 4793.
##  8 Retired                       766 2847.  2414. 1713.    0  13795. 1670. 3570.
##  9 Self employed                 118 3627.  3017. 2796.    0  14735. 1795. 4658.
## 10 Temporarily laid off/short…     1  766.   766.   NA   766.   766.  766.  766.
## 11 Unemployed                     50 1531.   953. 1473.    0   6654.  451. 2215.
## 12 Unpaid, family business         1 1469.  1469.   NA  1469.  1469. 1469. 1469.
## 13 don't know                      1  520    520    NA   520    520   520   520 
## 14 refusal                         1  791.   791.   NA   791.   791.  791.  791.
Yorkshire and The Humber
# Filter data 
Yorkshire_and_The_Humber_data <- data %>%
  filter(jkl_gor_dv == "Yorkshire and The Humber")

# Divide the categories into two subsets
categories <- unique(Yorkshire_and_The_Humber_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
Yorkshire_and_The_Humber_data_1 <- Yorkshire_and_The_Humber_data %>%
  filter(jkl_jbstat %in% categories_1)

Yorkshire_and_The_Humber_2 <- Yorkshire_and_The_Humber_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(Yorkshire_and_The_Humber_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Yorkshire and The Humber (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 30000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(Yorkshire_and_The_Humber_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Yorkshire and The Humber (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

print(plot2)

# Calculate summary statistics
summary_statistics <- Yorkshire_and_The_Humber_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 14 × 9
##    jkl_jbstat               count  mean median     sd     min    max    Q1    Q3
##    <chr>                    <int> <dbl>  <dbl>  <dbl>   <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else         9 2654.  2400.  1971. 0       5.70e3 1000  3997 
##  2 Family care or home         46 3321.  2887.  1865. 8.52e+2 8.03e3 1768. 4632.
##  3 Full-time student           16 2467.  2451.  1873. 1.08e+2 5.52e3  643. 4062.
##  4 LT sick or disabled         68 2002.  1765   1166. 0       6.33e3 1229. 2661.
##  5 On apprenticeship            1 3002.  3002.    NA  3.00e+3 3.00e3 3002. 3002.
##  6 On furlough                 18 3179.  3316.  1571. 1.17e+3 5.56e3 1447. 4560.
##  7 On maternity leave           2 4816.  4816.  1703. 3.61e+3 6.02e3 4214. 5418.
##  8 Paid employment(ft/pt)     614 3688.  3354.  4412. 0       1.04e5 2349. 4591.
##  9 Retired                    669 2687.  2264.  1841. 0       1.78e4 1507. 3419 
## 10 Self employed               95 4258.  2558. 10648. 8.00e-2 1.04e5 1481. 4297 
## 11 Temporarily laid off/sh…     1 7979.  7979.    NA  7.98e+3 7.98e3 7979. 7979.
## 12 Unemployed                  52 2248.  1961.  1629. 0       5.97e3 1083. 3047.
## 13 don't know                   2 1138.  1138.     0  1.14e+3 1.14e3 1138. 1138.
## 14 refusal                      2 3178.  3178.  1301. 2.26e+3 4.10e3 2718. 3638.
Scotland
# Filter data 
Scotland_data <- data %>%
  filter(jkl_gor_dv == "Scotland")

# Divide the categories into two subsets
categories <- unique(Scotland_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
Scotland_data_1 <- Scotland_data %>%
  filter(jkl_jbstat %in% categories_1)

Scotland_2 <- Scotland_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(Scotland_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in  Scotland (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 40000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(Scotland_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Scotland (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 40000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)

# Calculate summary statistics
summary_statistics <-Scotland_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 14 × 9
##    jkl_jbstat             count   mean median     sd    min    max     Q1     Q3
##    <chr>                  <int>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
##  1 Doing something else       6  1740.  1635.  1430.   315.  4179.   673.  2150.
##  2 Family care or home       23  3156.  2951.  2070.   141.  8957   1799.  4138.
##  3 Full-time student         21  2886.  1680   4213.   155. 19036.   731.  2611.
##  4 Govt training scheme       1  1635.  1635.    NA   1635.  1635.  1635.  1635.
##  5 LT sick or disabled       80  2400.  1995.  2639.     0  23136.  1218.  2858.
##  6 On furlough               16  2957.  3155.  1718.   200   7386.  1512.  3796.
##  7 On maternity leave         5  3688.  3104.  1659.  2102   6192.  2563.  4476.
##  8 Paid employment(ft/pt)   691  3709.  3312.  2409.     0  30812.  2204.  4727.
##  9 Retired                  779  2740.  2271.  1951.     0  14223.  1501.  3318.
## 10 Self employed            109  3318.  2636.  2313.     0  10658.  1667.  4629.
## 11 Temporarily laid off/…     3 12389.  4464. 16007.  1891. 30812.  3178. 17638.
## 12 Unemployed                34  1660.  1239.  1181.     0   4844.   888.  2377.
## 13 Unpaid, family busine…     1  4710.  4710.    NA   4710.  4710.  4710.  4710.
## 14 refusal                    1 11364. 11364.    NA  11364. 11364. 11364. 11364.
East Midlands
# Filter data 
East_Midlands_data <- data %>%
  filter(jkl_gor_dv == "East Midlands")

# Divide the categories into two subsets
categories <- unique(East_Midlands_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
East_Midlands_data_1 <- East_Midlands_data %>%
  filter(jkl_jbstat %in% categories_1)

East_Midlands_2 <- East_Midlands_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(East_Midlands_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in  East Midlands (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 40000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(East_Midlands_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in East Midlands (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 15000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)

# Calculate summary statistics
summary_statistics <-East_Midlands_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 12 × 9
##    jkl_jbstat                 count  mean median    sd    min    max    Q1    Q3
##    <chr>                      <int> <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else           8 1879.  1080. 1696.  170    4560.  742. 3238.
##  2 Family care or home           26 3116.  2434. 2466.   16.1 11547. 1564. 4226.
##  3 Full-time student              6 2046.  1617.  939. 1200    3682. 1533. 2397.
##  4 LT sick or disabled           49 2501.  1861. 2320.  260   16112. 1330  3017.
##  5 On furlough                    5 2832.  2798. 1066. 1776.   4517. 2082. 2989.
##  6 Paid employment(ft/pt)       598 3902.  3413. 2916.    0   38629. 2196. 4849.
##  7 Retired                      605 2754.  2397. 1920.    0   20872. 1596. 3428.
##  8 Self employed                 99 4056.  3133  4409.   80.0 38629. 1685. 5266.
##  9 Temporarily laid off/shor…     1 5651.  5651.   NA  5651.   5651. 5651. 5651.
## 10 Unemployed                    53 1679.  1537. 1276.    0    4988.  712. 2223.
## 11 Unpaid, family business        2 5081.  5081.  604. 4654.   5508. 4867. 5294.
## 12 don't know                     1    0      0    NA     0       0     0     0
London
# Filter data 
London_data <- data %>%
  filter(jkl_gor_dv == "London")

# Divide the categories into two subsets
categories <- unique(London_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
London_data_1 <- London_data %>%
  filter(jkl_jbstat %in% categories_1)

London_2 <- London_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(London_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in  London (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 40000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(London_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in London (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 40000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)

# Calculate summary statistics
summary_statistics <-London_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 15 × 9
##    jkl_jbstat                count  mean median    sd    min    max    Q1     Q3
##    <chr>                     <int> <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl>  <dbl>
##  1 Doing something else          9 3442.  3491. 2423.   15.2  8294. 2050.  4550.
##  2 Family care or home          64 4416.  3752. 4092.    0   25357. 1830.  5291.
##  3 Full-time student            13 1921.  1005. 2044.    0    6016.  225.  4104.
##  4 Govt training scheme          1 2722   2722    NA  2722    2722  2722   2722 
##  5 LT sick or disabled          75 2578.  1707. 2849.    0   21001. 1204.  2999.
##  6 On furlough                  25 3503.  3183. 2208. 1185.  10185. 2009.  4241.
##  7 On maternity leave            1 1299   1299    NA  1299    1299  1299   1299 
##  8 Paid employment(ft/pt)      875 4610.  4086. 2920.    0   24474. 2533   5984.
##  9 Retired                     581 3329.  2525  2629.    0   21001. 1633.  4317.
## 10 Self employed               195 4952.  3758. 4360.    0   25357. 1919.  6357.
## 11 Temporarily laid off/sho…     5 1865.  2219. 1243.   80    3183. 1170   2671.
## 12 Unemployed                   97 2402.  1813. 2779.    0   19854.  665   3221.
## 13 Unpaid, family business       2 2529.  2529. 1738. 1300    3758. 1915.  3144.
## 14 don't know                    3 6620.  9918. 5712.   24.9  9918. 4971.  9918.
## 15 refusal                       8 9185.  5838. 9147.   24.9 19854. 1624. 19854.
South East
# Filter data 
South_East_data <- data %>%
  filter(jkl_gor_dv == "South East")

# Divide the categories into two subsets
categories <- unique(South_East_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
South_East_data_1 <- South_East_data %>%
  filter(jkl_jbstat %in% categories_1)

South_East_2 <- South_East_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(South_East_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in  South East (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 30000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(South_East_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in South East (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 50000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

print(plot2)

# Calculate summary statistics
summary_statistics <-South_East_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 14 × 9
##    jkl_jbstat                count  mean median    sd     min    max    Q1    Q3
##    <chr>                     <int> <dbl>  <dbl> <dbl>   <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else         13 3028.  2732. 2061.  0       8992. 2117. 3150.
##  2 Family care or home          44 3911.  3118. 3111.  0      15111. 1755. 4935.
##  3 Full-time student             7 1985.  2040. 1579.  0       5024. 1152. 2265.
##  4 LT sick or disabled          53 2343.  1970. 1381.  5.42e2  6682. 1415. 3161.
##  5 On apprenticeship             1 3342.  3342.   NA   3.34e3  3342. 3342. 3342.
##  6 On furlough                  17 3089.  3122. 1126.  7.24e2  5402. 2752. 3606.
##  7 On maternity leave            5 4525.  2025. 6381.  8.76e2 15906. 1694  2126.
##  8 Paid employment(ft/pt)      981 4287.  3831. 2637. -3.31e3 18698. 2508. 5498.
##  9 Retired                    1047 3275.  2804. 2375.  3.84e0 19684. 1726. 3980.
## 10 Self employed               203 4239.  3658. 3295. -3.31e3 19684. 2055. 5229.
## 11 Temporarily laid off/sho…     2 3441.  3441. 1489.  2.39e3  4494. 2914. 3967.
## 12 Unemployed                   50 2213.  1377. 2236.  0       9767.  743. 3492.
## 13 Unpaid, family business       1 2448.  2448.   NA   2.45e3  2448. 2448. 2448.
## 14 refusal                       1 1169.  1169.   NA   1.17e3  1169. 1169. 1169.
South West
# Filter data 
South_West_data <- data %>%
  filter(jkl_gor_dv == "South West")

# Divide the categories into two subsets
categories <- unique(South_West_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
South_West_data_1 <- South_West_data %>%
  filter(jkl_jbstat %in% categories_1)

South_West_2 <- South_West_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(South_West_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in  South West (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 30000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(South_West_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in South West (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 30000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)

# Calculate summary statistics
summary_statistics <-South_West_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 13 × 9
##    jkl_jbstat                 count  mean median    sd    min    max    Q1    Q3
##    <chr>                      <int> <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else           8 3106.  3353. 1475. 6.8 e2  5653. 2315. 3674.
##  2 Family care or home           25 3473.  2753. 2819. 0      10526. 1296. 5175.
##  3 Full-time student             12 1741.  1091. 1691. 0       5054.  471. 2715.
##  4 LT sick or disabled           47 2550.  2023. 1457. 5.45e2  6549. 1625. 2897.
##  5 On apprenticeship              2 3482.  3482.  908. 2.84e3  4124. 3161. 3803.
##  6 On furlough                   11 2914.  3023. 1340. 9.58e2  4982. 1765. 3774.
##  7 Paid employment(ft/pt)       653 3927.  3447. 2460. 0      23920. 2300  4980.
##  8 Retired                      748 2838.  2474. 1898. 2.92e0 23125. 1610. 3476.
##  9 Self employed                155 4128.  2831. 4167. 4.17e1 23125. 1728. 4672.
## 10 Temporarily laid off/shor…     4 2580.  2089. 2633. 8.38e1  6057.  838. 3831.
## 11 Unemployed                    35 1793.  1908.  829. 3.23e2  3597. 1130. 2329.
## 12 Unpaid, family business        1 8451.  8451.   NA  8.45e3  8451. 8451. 8451.
## 13 refusal                        1 9819.  9819.   NA  9.82e3  9819. 9819. 9819.
East of England
# Filter data 
East_of_England_data <- data %>%
  filter(jkl_gor_dv == "East of England")

# Divide the categories into two subsets
categories <- unique(East_of_England_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
East_of_England_data_1 <- East_of_England_data %>%
  filter(jkl_jbstat %in% categories_1)

East_of_England_2 <- East_of_England_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(East_of_England_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in  East of England (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 30000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(East_of_England_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in East of England (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 30000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)

# Calculate summary statistics
summary_statistics <-East_of_England_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 10 × 9
##    jkl_jbstat             count  mean median    sd     min    max    Q1    Q3
##    <chr>                  <int> <dbl>  <dbl> <dbl>   <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else       3 1012.  1274.  909.    0     1761.  637. 1517.
##  2 Family care or home       32 3917.  2495. 3326.   60    14096. 1804. 5020.
##  3 Full-time student         10 2217.  1961. 1606.  249.    5087. 1127. 2689.
##  4 LT sick or disabled       44 2018.  1871. 1186.  414.    5059. 1094. 2506.
##  5 On furlough                8 3390.  3120. 2388.  573.    8238. 1726. 4203.
##  6 Paid employment(ft/pt)   638 4094.  3899. 2069.    0    12930. 2495. 5278.
##  7 Retired                  744 2988.  2450. 2046.    0    15230. 1598. 3711.
##  8 Self employed            133 3665.  3453. 2434.    1.67 14096. 1904. 4772.
##  9 Unemployed                28 2181.  1920. 1885.    5.57  7156.  792. 3087.
## 10 refusal                    2 3478.  3478.  781. 2925.    4030. 3201. 3754.
Wales
# Filter data 
Wales_data <- data %>%
  filter(jkl_gor_dv == "Wales")

# Divide the categories into two subsets
categories <- unique(Wales_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
Wales_data_1 <- Wales_data %>%
  filter(jkl_jbstat %in% categories_1)

Wales_2 <- Wales_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(Wales_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in  Wales (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(Wales_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Wales (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)

print(plot2)

# Calculate summary statistics
summary_statistics <-Wales_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 13 × 9
##    jkl_jbstat                 count  mean median    sd    min    max    Q1    Q3
##    <chr>                      <int> <dbl>  <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else           7 2918.  2712. 1770. 1007.   5780. 1619. 3843.
##  2 Family care or home           20 2817.  3062. 1393.   52.5  5923. 1873. 3531.
##  3 Full-time student              6 2454.  2325. 1543.  933.   4502. 1124. 3518.
##  4 LT sick or disabled           50 2777.  2332. 1415.  784    7087. 1821. 3584.
##  5 On furlough                   10 2941.  2725. 1647.  558.   6757. 2605. 3420.
##  6 Paid employment(ft/pt)       472 3598.  3343. 1906.  498   12391. 2245. 4434.
##  7 Retired                      605 2600.  2279. 1493.    0    8778. 1535. 3403.
##  8 Self employed                 80 3483.  3251. 2141.   52.5 13255. 2119. 4286.
##  9 Temporarily laid off/shor…     1 3562.  3562.   NA  3562.   3562. 3562. 3562.
## 10 Unemployed                    29 1450.  1540.  988.   54.2  4094.  498  2028.
## 11 Unpaid, family business        1 4704.  4704.   NA  4704.   4704. 4704. 4704.
## 12 don't know                     1 1400   1400    NA  1400    1400  1400  1400 
## 13 refusal                        1 3951.  3951.   NA  3951.   3951. 3951. 3951.
Northern Ireland
# Filter data 
Northern_Ireland_data <- data %>%
  filter(jkl_gor_dv == "Northern Ireland")

# Divide the categories into two subsets
categories <- unique(Northern_Ireland_data$jkl_jbstat)
half <- ceiling(length(categories) / 2)
categories_1 <- categories[1:half]
categories_2 <- categories[(half + 1):length(categories)]

# Subset data
Northern_Ireland_data_1 <- Northern_Ireland_data %>%
  filter(jkl_jbstat %in% categories_1)

Northern_Ireland_2 <- Northern_Ireland_data %>%
  filter(jkl_jbstat %in% categories_2)

# Generate the first box plot
plot1 <- ggplot(Northern_Ireland_data_1, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in  Northern Ireland (Part 1)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 40000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Generate the second box plot
plot2 <- ggplot(Northern_Ireland_2, aes(x = jkl_jbstat, y = jkl_fihhmnnet1_dv, fill = jkl_jbstat)) +
  geom_boxplot() +
  labs(
    title = 'Income Distribution by Employment Status in Northern Ireland (Part 2)',
    x = 'Employment Status',
    y = 'Total Estimated Net Monthly Income (jkl_fihhmnnet1_dv)',
    fill = 'Employment Status'
  ) +
  scale_y_continuous(
    limits = c(0, 20000), 
    breaks = seq(0, 120000, by = 15000), 
    labels = scales::comma, 
    expand = expansion(mult = c(0, 0.05))
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 16, face = "bold"),
    legend.position = "none"  # Hide legend as fill colors already labeled on x-axis
  )

# Print the plots
print(plot1)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

print(plot2)

# Calculate summary statistics
summary_statistics <-Northern_Ireland_data %>%
  group_by(jkl_jbstat) %>%
  summarise(
    count = n(),
    mean = mean(jkl_fihhmnnet1_dv, na.rm = TRUE),
    median = median(jkl_fihhmnnet1_dv, na.rm = TRUE),
    sd = sd(jkl_fihhmnnet1_dv, na.rm = TRUE),
    min = min(jkl_fihhmnnet1_dv, na.rm = TRUE),
    max = max(jkl_fihhmnnet1_dv, na.rm = TRUE),
    Q1 = quantile(jkl_fihhmnnet1_dv, 0.25, na.rm = TRUE),
    Q3 = quantile(jkl_fihhmnnet1_dv, 0.75, na.rm = TRUE)
  )

# Print the summary statistics
print(summary_statistics)
## # A tibble: 11 × 9
##    jkl_jbstat              count  mean median    sd   min    max    Q1    Q3
##    <chr>                   <int> <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>
##  1 Doing something else       11 2722.  1900. 2145.  240   6968. 1077. 4167.
##  2 Family care or home        24 3026.  2564. 1704. 1136.  7957. 1798. 3821.
##  3 Full-time student           1 2173.  2173.   NA  2173.  2173. 2173. 2173.
##  4 LT sick or disabled        88 2199.  1968. 1387.    0   9967. 1285. 2837.
##  5 On furlough                 7 2995.  2757. 1205. 1427.  4626. 2184. 3896.
##  6 On maternity leave          1 1683.  1683.   NA  1683.  1683. 1683. 1683.
##  7 Paid employment(ft/pt)    403 3789.  3362. 2217.    0  21825. 2174. 4957 
##  8 Retired                   504 2781.  2275. 2196.    0  23362. 1504. 3363.
##  9 Self employed              70 3585.  2799. 3269. -794. 21825. 1543. 4686.
## 10 Unemployed                 36 1590.  1090. 1306.    0   4472.  636. 2301.
## 11 Unpaid, family business     1 2481.  2481.   NA  2481.  2481. 2481. 2481.
Urban or Rural Area, Household Composition and Income

Examining the relationship between Urban or Rural Area (jkl_urban_dv) with Household Composition (jkl_hhtype_dv) and Income is also crucial. Urban areas typically have higher living costs but more job opportunities, while rural areas might have lower costs but fewer employment options. Understanding this relationship helps in designing policies that address the unique economic needs of urban and rural households, such as urban housing affordability and rural job creation programs.

# Filter out missing category
filtered_data <- data %>% filter(jkl_urban_dv != "missing")

# Define the groups for the household composition
group1 <- c("1 adult under pensionable age, no children", "1 adult, 1 child", "1 adult, 2 or more children")
group2 <- c("1 female, age 60+, no children", "1 male, aged 65+, no children", "2 adults, not a couple, 1 or more children")
group3 <- c("2 adults, not a couple, both under pensionable age, no children", "2 adults, not a couple, one or more over pensionable age, no children", "3 or more adults, 1 or more children, excl. any couples")
group4 <- c("3 or more adults, 1-2 children, incl. at least one couple", "3 or more adults, no children, excl. any couples", "3 or more adults, no children, incl. at least one couple", "Couple 1 or more over pensionable age,no children", "Couple both under pensionable age, no children", "Couple with 1 child", "Couple with 2 children")

# Create plots for each group
plot_group <- function(data, group, title) {
  ggplot(data %>% filter(jkl_hhtype_dv %in% group), 
         aes(x = jkl_urban_dv, y = jkl_fimnnet_dv, fill = jkl_hhtype_dv)) +
    geom_boxplot() +
    labs(
      title = title,
      x = 'Urban or Rural Area (jkl_urban_dv)',
      y = 'Total Estimated Net Monthly Income (jkl_fimnnet_dv)',
      fill = 'Household Composition'
    ) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

# Plotting each group
plot1 <- plot_group(filtered_data, group1, 'Group 1: Single Adults and Single Parents')
plot2 <- plot_group(filtered_data, group2, 'Group 2: Older Adults and Non-couple Adults with Children')
plot3 <- plot_group(filtered_data, group3, 'Group 3: Non-couple Adults and Large Households with Children')
plot4 <- plot_group(filtered_data, group4, 'Group 4: Couples and Large Households without Children')

# Print the plots
print(plot1)

print(plot2)

print(plot3)

print(plot4)

Ethnic Group, Employment Status and Income

The relationship between Ethnic Group (jkl_ethn_dv) and Employment Status and Income helps provide an understanding of how employment status and income vary by ethnic group. This can highlight economic inequalities and potential discrimination in the labor market. This relationship is crucial for developing policies aimed at promoting equal opportunities and reducing economic disparities among different ethnic groups.

# Filter out missing category for ethnic group if needed
filtered_data <- data %>% filter(jkl_ethn_dv != "missing")

# Define the groups for the ethnic groups and employment statuses
ethnic_groups <- list(
  group1 = c("african", "arab", "bangladeshi", "caribbean", "chinese"),
  group2 = c("any other asian background", "any other black background", "any other ethnic group", "any other mixed background", "any other white background"),
  group3 = c("british/english/scottish/welsh/northern irish", "gypsy or irish traveller", "indian", "irish", "pakistani"),
  group4 = c("white and asian", "white and black african", "white and black caribbean")
)

employment_statuses <- list(
  group1 = c("Paid employment(ft/pt)", "Self employed"),
  group2 = c("Unemployed", "Retired"),
  group3 = c("Student", "Homemaker", "On maternity leave", "On furlough", "On apprenticeship", "LT sick or disabled", "Govt training scheme"),
  group4 = c("Family care or home", "Temporarily laid off/short term working", "On other scheme", "Other inactive")
)

# Function to create plots for each combination of ethnic groups and employment statuses
plot_combination <- function(data, ethnic_group, employment_group, title) {
  ggplot(data %>% filter(jkl_ethn_dv %in% ethnic_group & jkl_jbstat %in% employment_group), 
         aes(x = jkl_ethn_dv, y = jkl_fimnnet_dv, fill = jkl_jbstat)) +
    geom_boxplot() +
    labs(
      title = title,
      x = 'Ethnic Group (jkl_ethn_dv)',
      y = 'Total Estimated Net Monthly Income (jkl_fimnnet_dv)',
      fill = 'Employment Status'
    ) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

# Create a nested loop to generate plots for all combinations
plot_list <- list()
plot_number <- 1

for (i in seq_along(ethnic_groups)) {
  for (j in seq_along(employment_statuses)) {
    title <- paste('Group', plot_number, ': Ethnic Group', i, 'and Employment Status', j)
    plot_list[[plot_number]] <- plot_combination(filtered_data, ethnic_groups[[i]], employment_statuses[[j]], title)
    plot_number <- plot_number + 1
  }
}

# Print the plots
for (plot in plot_list) {
  print(plot)
}

Socio-economic Classification, Income and Employment Status

The relationships between Socio-economic Classification (jkl_jbsoc00_cc, jkl_jbnssec8_dv, jkl_jbnssec5_dv, jkl_jbnssec3_dv) and Income and Employment Status help to understand how social class and occupational status affect income levels and employment opportunities. Higher socio-economic classes generally have better access to high-paying jobs and economic stability. This information is valuable for assessing social mobility and the effectiveness of policies aimed at reducing economic inequality.

# Filter out missing category for NS-SEC if needed
filtered_data <- data %>% filter(jkl_jbnssec3_dv != "missing")

# Define the groups for the employment statuses
group1 <- c("Paid employment(ft/pt)", "Self employed")
group2 <- c("Unemployed", "Retired")
group3 <- c("Student", "Homemaker", "On maternity leave", "On furlough", "On apprenticeship", "LT sick or disabled", "Govt training scheme")
group4 <- c("Family care or home", "Temporarily laid off/short term working", "On other scheme", "Other inactive")

# Create plots for each group
plot_group <- function(data, group, title) {
  ggplot(data %>% filter(jkl_jbstat %in% group), 
         aes(x = jkl_jbnssec3_dv, y = jkl_fimnnet_dv, fill = jkl_jbstat)) +
    geom_boxplot() +
    labs(
      title = title,
      x = 'NS-SEC Three-Class (jkl_jbnssec3_dv)',
      y = 'Total Estimated Net Monthly Income (jkl_fimnnet_dv)',
      fill = 'Employment Status'
    ) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

# Plotting each group
plot1 <- plot_group(filtered_data, group1, 'Group 1: Paid Employment and Self Employed')
plot2 <- plot_group(filtered_data, group2, 'Group 2: Unemployed and Retired')
plot3 <- plot_group(filtered_data, group3, 'Group 3: Students, Homemakers, and Others')
plot4 <- plot_group(filtered_data, group4, 'Group 4: Family Care, Temporarily Laid Off, and Other Inactive')

# Print the plots
print(plot1)

print(plot2)

print(plot3)

print(plot4)

Health and Well-being, Employment Status and Income

The implications of how Health and Well-being (jkl_sf12mcs_dv, jkl_sf12pcs_dv, jkl_health, jkl_scghq1_dv, jkl_scghq2_dv) and Employment Status and Income interact are also eye-opening. Poor health can lead to lower employment rates and income levels due to reduced work capacity and higher healthcare costs. Analyzing these relationships can highlight the economic impact of health issues and the importance of healthcare policies that support economic productivity and well-being.

# Filter out missing category for health status if needed
filtered_data <- data %>% filter(jkl_health != "missing")

# Define the groups for the employment statuses
group1 <- c("Paid employment(ft/pt)", "Self employed")
group2 <- c("Unemployed", "Retired")
group3 <- c("Student", "Homemaker", "On maternity leave", "On furlough", "On apprenticeship", "LT sick or disabled", "Govt training scheme")
group4 <- c("Family care or home", "Temporarily laid off/short term working", "On other scheme", "Other inactive")

# Create box plots for each group
plot_group <- function(data, group, title) {
  ggplot(data %>% filter(jkl_jbstat %in% group), 
         aes(x = jkl_health, y = jkl_fimnnet_dv, fill = jkl_jbstat)) +
    geom_boxplot() +
    labs(
      title = title,
      x = 'Health Status (jkl_health)',
      y = 'Total Estimated Net Monthly Income (jkl_fimnnet_dv)',
      fill = 'Employment Status'
    ) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

# Plotting each group
plot1 <- plot_group(filtered_data, group1, 'Group 1: Paid Employment and Self Employed')
plot2 <- plot_group(filtered_data, group2, 'Group 2: Unemployed and Retired')
plot3 <- plot_group(filtered_data, group3, 'Group 3: Students, Homemakers, and Others')
plot4 <- plot_group(filtered_data, group4, 'Group 4: Family Care, Temporarily Laid Off, and Other Inactive')

# Print the plots
print(plot1)

print(plot2)

print(plot3)

print(plot4)

Household Composition, Size and Income

The interaction between Household Composition and Size (jkl_hhsize, jkl_nkids_dv, jkl_hhtype_dv) with Income shows how larger households or those with more children may have different economic needs and financial pressures compared to smaller households. Understanding these relationships can help in designing social support programs, such as child benefits or family tax credits, to ensure economic stability and well-being for different household types.

# Filter out missing category for household type if needed
filtered_data <- data %>% filter(jkl_hhtype_dv != "missing")

# Define the groups for the household types
group1 <- c("1 adult under pensionable age, no children", "1 adult, 1 child", "1 adult, 2 or more children")
group2 <- c("1 female, age 60+, no children", "1 male, aged 65+, no children", "2 adults, not a couple, 1 or more children")
group3 <- c("2 adults, not a couple, both under pensionable age, no children", "2 adults, not a couple, one or more over pensionable age, no children", "3 or more adults, 1 or more children, excl. any couples")
group4 <- c("3 or more adults, 1-2 children, incl. at least one couple", "3 or more adults, no children, excl. any couples", "3 or more adults, no children, incl. at least one couple", "Couple 1 or more over pensionable age, no children", "Couple both under pensionable age, no children", "Couple with 1 child", "Couple with 2 children")

# Create box plots for each group
plot_group <- function(data, group, title) {
  ggplot(data %>% filter(jkl_hhtype_dv %in% group), 
         aes(x = jkl_hhtype_dv, y = jkl_fimnnet_dv, fill = jkl_hhtype_dv)) +
    geom_boxplot() +
    labs(
      title = title,
      x = 'Household Composition (jkl_hhtype_dv)',
      y = 'Total Estimated Net Monthly Income (jkl_fimnnet_dv)',
      fill = 'Household Composition'
    ) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

# Plotting each group
plot1 <- plot_group(filtered_data, group1, 'Group 1: Single Adults and Single Parents')
plot2 <- plot_group(filtered_data, group2, 'Group 2: Older Adults and Non-couple Adults with Children')
plot3 <- plot_group(filtered_data, group3, 'Group 3: Non-couple Adults and Large Households with Children')
plot4 <- plot_group(filtered_data, group4, 'Group 4: Couples and Large Households without Children')

# Print the plots
print(plot1)

print(plot2)

print(plot3)

print(plot4)

Tenure, Income and Employment Status

The interaction between housing Tenure (jkl_tenure_dv) with Income and Employment Status is crucial, especially since homeownership is often associated with greater financial stability and wealth accumulation, while renting can be more common among lower-income and less stable employment groups. Analyzing this relationship can inform housing policies aimed at promoting homeownership and addressing rental market challenges.

# Filter out missing category for housing tenure if needed
filtered_data <- data %>% filter(jkl_tenure_dv != "missing")

# Define the groups for the employment statuses
employment_statuses <- list(
  group1 = c("Paid employment(ft/pt)", "Self employed"),
  group2 = c("Unemployed", "Retired"),
  group3 = c("Student", "Homemaker", "On maternity leave", "On furlough", "On apprenticeship", "LT sick or disabled", "Govt training scheme"),
  group4 = c("Family care or home", "Temporarily laid off/short term working", "On other scheme", "Other inactive")
)

# Define the groups for the housing tenure
housing_tenures <- list(
  group1 = c("Housing assoc rented", "Local authority rent"),
  group2 = c("Owned outright", "Owned with mortgage"),
  group3 = c("Rented from employer", "Rented private furnished", "Rented private unfurnished"),
  group4 = c("Other", "missing")
)

# Create box plots for each combination of employment status and housing tenure
plot_combination <- function(data, tenure_group, status_group, title) {
  ggplot(data %>% filter(jkl_tenure_dv %in% tenure_group & jkl_jbstat %in% status_group), 
         aes(x = jkl_tenure_dv, y = jkl_fimnnet_dv, fill = jkl_jbstat)) +
    geom_boxplot() +
    labs(
      title = title,
      x = 'Housing Tenure (jkl_tenure_dv)',
      y = 'Total Estimated Net Monthly Income (jkl_fimnnet_dv)',
      fill = 'Employment Status'
    ) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

# Create and store plots for each combination of groups
plot_list <- list()
plot_number <- 1

for (i in seq_along(housing_tenures)) {
  for (j in seq_along(employment_statuses)) {
    title <- paste('Plot', plot_number, ': Housing Tenure Group', i, 'and Employment Status Group', j)
    plot_list[[plot_number]] <- plot_combination(filtered_data, housing_tenures[[i]], employment_statuses[[j]], title)
    plot_number <- plot_number + 1
  }
}

# Print the plots
for (plot in plot_list) {
  print(plot)
}